diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index aa03bfb9a54b9..16f8d4658dc20 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3141,7 +3141,7 @@ def duplicated(self, subset=None, keep='first'):
         -------
         duplicated : Series
         """
-        from pandas.core.groupby import get_group_index
+        from pandas.core.sorting import get_group_index
         from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT
 
         def f(vals):
@@ -3179,7 +3179,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
             raise ValueError('Length of ascending (%d) != length of by (%d)' %
                              (len(ascending), len(by)))
         if len(by) > 1:
-            from pandas.core.groupby import _lexsort_indexer
+            from pandas.core.sorting import lexsort_indexer
 
             def trans(v):
                 if needs_i8_conversion(v):
@@ -3193,11 +3193,11 @@ def trans(v):
                     raise ValueError('Cannot sort by duplicate column %s' %
                                      str(x))
                 keys.append(trans(k))
-            indexer = _lexsort_indexer(keys, orders=ascending,
-                                       na_position=na_position)
+            indexer = lexsort_indexer(keys, orders=ascending,
+                                      na_position=na_position)
             indexer = _ensure_platform_int(indexer)
         else:
-            from pandas.core.groupby import _nargsort
+            from pandas.core.sorting import nargsort
 
             by = by[0]
             k = self.xs(by, axis=other_axis).values
@@ -3214,8 +3214,8 @@ def trans(v):
             if isinstance(ascending, (tuple, list)):
                 ascending = ascending[0]
 
-            indexer = _nargsort(k, kind=kind, ascending=ascending,
-                                na_position=na_position)
+            indexer = nargsort(k, kind=kind, ascending=ascending,
+                               na_position=na_position)
 
         new_data = self._data.take(indexer,
                                    axis=self._get_block_manager_axis(axis),
@@ -3300,17 +3300,17 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
                                                  sort_remaining=sort_remaining)
 
         elif isinstance(labels, MultiIndex):
-            from pandas.core.groupby import _lexsort_indexer
+            from pandas.core.sorting import lexsort_indexer
 
             # make sure that the axis is lexsorted to start
             # if not we need to reconstruct to get the correct indexer
             if not labels.is_lexsorted():
                 labels = MultiIndex.from_tuples(labels.values)
 
-            indexer = _lexsort_indexer(labels.labels, orders=ascending,
-                                       na_position=na_position)
+            indexer = lexsort_indexer(labels.labels, orders=ascending,
+                                      na_position=na_position)
         else:
-            from pandas.core.groupby import _nargsort
+            from pandas.core.sorting import nargsort
 
             # GH11080 - Check monotonic-ness before sort an index
             # if monotonic (already sorted), return None or copy() according
@@ -3322,8 +3322,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
                 else:
                     return self.copy()
 
-            indexer = _nargsort(labels, kind=kind, ascending=ascending,
-                                na_position=na_position)
+            indexer = nargsort(labels, kind=kind, ascending=ascending,
+                               na_position=na_position)
 
         new_data = self._data.take(indexer,
                                    axis=self._get_block_manager_axis(axis),
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index a228861270aea..23c835318b0e6 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -7,7 +7,7 @@
 import copy
 
 from pandas.compat import (
-    zip, range, long, lzip,
+    zip, range, lzip,
     callable, map
 )
 from pandas import compat
@@ -47,6 +47,9 @@
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
 from pandas.core.panel import Panel
+from pandas.core.sorting import (get_group_index_sorter, get_group_index,
+                                 compress_group_index, get_flattened_iterator,
+                                 decons_obs_group_ids, get_indexer_dict)
 from pandas.util.decorators import (cache_readonly, Substitution, Appender,
                                     make_signature, deprecate_kwarg)
 from pandas.formats.printing import pprint_thing
@@ -59,7 +62,6 @@
 from pandas.lib import Timestamp
 import pandas.tslib as tslib
 import pandas.algos as _algos
-import pandas.hashtable as _hash
 
 _doc_template = """
 
@@ -729,7 +731,7 @@ def _cumcount_array(self, ascending=True):
         (though the default is sort=True) for groupby in general
         """
         ids, _, ngroups = self.grouper.group_info
-        sorter = _get_group_index_sorter(ids, ngroups)
+        sorter = get_group_index_sorter(ids, ngroups)
         ids, count = ids[sorter], len(ids)
 
         if count == 0:
@@ -1616,9 +1618,12 @@ def _get_group_keys(self):
             return self.levels[0]
         else:
             comp_ids, _, ngroups = self.group_info
+
             # provide "flattened" iterator for multi-group setting
-            mapper = _KeyMapper(comp_ids, ngroups, self.labels, self.levels)
-            return [mapper.get_key(i) for i in range(ngroups)]
+            return get_flattened_iterator(comp_ids,
+                                          ngroups,
+                                          self.levels,
+                                          self.labels)
 
     def apply(self, f, data, axis=0):
         mutated = self.mutated
@@ -1662,7 +1667,7 @@ def indices(self):
             label_list = [ping.labels for ping in self.groupings]
             keys = [_values_from_object(ping.group_index)
                     for ping in self.groupings]
-            return _get_indices_dict(label_list, keys)
+            return get_indexer_dict(label_list, keys)
 
     @property
     def labels(self):
@@ -1726,7 +1731,7 @@ def _get_compressed_labels(self):
         if len(all_labels) > 1:
             group_index = get_group_index(all_labels, self.shape,
                                           sort=True, xnull=True)
-            return _compress_group_index(group_index, sort=self.sort)
+            return compress_group_index(group_index, sort=self.sort)
 
         ping = self.groupings[0]
         return ping.labels, np.arange(len(ping.group_index))
@@ -2027,7 +2032,7 @@ def _aggregate_series_fast(self, obj, func):
 
         # avoids object / Series creation overhead
         dummy = obj._get_values(slice(None, 0)).to_dense()
-        indexer = _get_group_index_sorter(group_index, ngroups)
+        indexer = get_group_index_sorter(group_index, ngroups)
         obj = obj.take(indexer, convert=False)
         group_index = algos.take_nd(group_index, indexer, allow_fill=False)
         grouper = lib.SeriesGrouper(obj, func, group_index, ngroups,
@@ -2424,7 +2429,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     a BaseGrouper.
 
     """
-
     group_axis = obj._get_axis(axis)
 
     # validate that the passed level is compatible with the passed
@@ -4206,7 +4210,7 @@ def slabels(self):
     @cache_readonly
     def sort_idx(self):
         # Counting sort indexer
-        return _get_group_index_sorter(self.labels, self.ngroups)
+        return get_group_index_sorter(self.labels, self.ngroups)
 
     def __iter__(self):
         sdata = self._get_sorted_data()
@@ -4302,355 +4306,3 @@ def get_splitter(data, *args, **kwargs):
         klass = NDFrameSplitter
 
     return klass(data, *args, **kwargs)
-
-
-# ----------------------------------------------------------------------
-# Misc utilities
-
-
-def get_group_index(labels, shape, sort, xnull):
-    """
-    For the particular label_list, gets the offsets into the hypothetical list
-    representing the totally ordered cartesian product of all possible label
-    combinations, *as long as* this space fits within int64 bounds;
-    otherwise, though group indices identify unique combinations of
-    labels, they cannot be deconstructed.
-    - If `sort`, rank of returned ids preserve lexical ranks of labels.
-      i.e. returned id's can be used to do lexical sort on labels;
-    - If `xnull` nulls (-1 labels) are passed through.
-
-    Parameters
-    ----------
-    labels: sequence of arrays
-        Integers identifying levels at each location
-    shape: sequence of ints same length as labels
-        Number of unique levels at each location
-    sort: boolean
-        If the ranks of returned ids should match lexical ranks of labels
-    xnull: boolean
-        If true nulls are excluded. i.e. -1 values in the labels are
-        passed through
-    Returns
-    -------
-    An array of type int64 where two elements are equal if their corresponding
-    labels are equal at all location.
-    """
-    def _int64_cut_off(shape):
-        acc = long(1)
-        for i, mul in enumerate(shape):
-            acc *= long(mul)
-            if not acc < _INT64_MAX:
-                return i
-        return len(shape)
-
-    def loop(labels, shape):
-        # how many levels can be done without overflow:
-        nlev = _int64_cut_off(shape)
-
-        # compute flat ids for the first `nlev` levels
-        stride = np.prod(shape[1:nlev], dtype='i8')
-        out = stride * labels[0].astype('i8', subok=False, copy=False)
-
-        for i in range(1, nlev):
-            if shape[i] == 0:
-                stride = 0
-            else:
-                stride //= shape[i]
-            out += labels[i] * stride
-
-        if xnull:  # exclude nulls
-            mask = labels[0] == -1
-            for lab in labels[1:nlev]:
-                mask |= lab == -1
-            out[mask] = -1
-
-        if nlev == len(shape):  # all levels done!
-            return out
-
-        # compress what has been done so far in order to avoid overflow
-        # to retain lexical ranks, obs_ids should be sorted
-        comp_ids, obs_ids = _compress_group_index(out, sort=sort)
-
-        labels = [comp_ids] + labels[nlev:]
-        shape = [len(obs_ids)] + shape[nlev:]
-
-        return loop(labels, shape)
-
-    def maybe_lift(lab, size):  # pormote nan values
-        return (lab + 1, size + 1) if (lab == -1).any() else (lab, size)
-
-    labels = map(_ensure_int64, labels)
-    if not xnull:
-        labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
-
-    return loop(list(labels), list(shape))
-
-
-_INT64_MAX = np.iinfo(np.int64).max
-
-
-def _int64_overflow_possible(shape):
-    the_prod = long(1)
-    for x in shape:
-        the_prod *= long(x)
-
-    return the_prod >= _INT64_MAX
-
-
-def decons_group_index(comp_labels, shape):
-    # reconstruct labels
-    if _int64_overflow_possible(shape):
-        # at some point group indices are factorized,
-        # and may not be deconstructed here! wrong path!
-        raise ValueError('cannot deconstruct factorized group indices!')
-
-    label_list = []
-    factor = 1
-    y = 0
-    x = comp_labels
-    for i in reversed(range(len(shape))):
-        labels = (x - y) % (factor * shape[i]) // factor
-        np.putmask(labels, comp_labels < 0, -1)
-        label_list.append(labels)
-        y = labels * factor
-        factor *= shape[i]
-    return label_list[::-1]
-
-
-def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
-    """
-    reconstruct labels from observed group ids
-
-    Parameters
-    ----------
-    xnull: boolean,
-        if nulls are excluded; i.e. -1 labels are passed through
-    """
-    from pandas.hashtable import unique_label_indices
-
-    if not xnull:
-        lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8')
-        shape = np.asarray(shape, dtype='i8') + lift
-
-    if not _int64_overflow_possible(shape):
-        # obs ids are deconstructable! take the fast route!
-        out = decons_group_index(obs_ids, shape)
-        return out if xnull or not lift.any() \
-            else [x - y for x, y in zip(out, lift)]
-
-    i = unique_label_indices(comp_ids)
-    i8copy = lambda a: a.astype('i8', subok=False, copy=True)
-    return [i8copy(lab[i]) for lab in labels]
-
-
-def _indexer_from_factorized(labels, shape, compress=True):
-    ids = get_group_index(labels, shape, sort=True, xnull=False)
-
-    if not compress:
-        ngroups = (ids.size and ids.max()) + 1
-    else:
-        ids, obs = _compress_group_index(ids, sort=True)
-        ngroups = len(obs)
-
-    return _get_group_index_sorter(ids, ngroups)
-
-
-def _lexsort_indexer(keys, orders=None, na_position='last'):
-    labels = []
-    shape = []
-    if isinstance(orders, bool):
-        orders = [orders] * len(keys)
-    elif orders is None:
-        orders = [True] * len(keys)
-
-    for key, order in zip(keys, orders):
-
-        # we are already a Categorical
-        if is_categorical_dtype(key):
-            c = key
-
-        # create the Categorical
-        else:
-            c = Categorical(key, ordered=True)
-
-        if na_position not in ['last', 'first']:
-            raise ValueError('invalid na_position: {!r}'.format(na_position))
-
-        n = len(c.categories)
-        codes = c.codes.copy()
-
-        mask = (c.codes == -1)
-        if order:  # ascending
-            if na_position == 'last':
-                codes = np.where(mask, n, codes)
-            elif na_position == 'first':
-                codes += 1
-        else:  # not order means descending
-            if na_position == 'last':
-                codes = np.where(mask, n, n - codes - 1)
-            elif na_position == 'first':
-                codes = np.where(mask, 0, n - codes)
-        if mask.any():
-            n += 1
-
-        shape.append(n)
-        labels.append(codes)
-
-    return _indexer_from_factorized(labels, shape)
-
-
-def _nargsort(items, kind='quicksort', ascending=True, na_position='last'):
-    """
-    This is intended to be a drop-in replacement for np.argsort which
-    handles NaNs. It adds ascending and na_position parameters.
-    GH #6399, #5231
-    """
-
-    # specially handle Categorical
-    if is_categorical_dtype(items):
-        return items.argsort(ascending=ascending)
-
-    items = np.asanyarray(items)
-    idx = np.arange(len(items))
-    mask = isnull(items)
-    non_nans = items[~mask]
-    non_nan_idx = idx[~mask]
-    nan_idx = np.nonzero(mask)[0]
-    if not ascending:
-        non_nans = non_nans[::-1]
-        non_nan_idx = non_nan_idx[::-1]
-    indexer = non_nan_idx[non_nans.argsort(kind=kind)]
-    if not ascending:
-        indexer = indexer[::-1]
-    # Finally, place the NaNs at the end or the beginning according to
-    # na_position
-    if na_position == 'last':
-        indexer = np.concatenate([indexer, nan_idx])
-    elif na_position == 'first':
-        indexer = np.concatenate([nan_idx, indexer])
-    else:
-        raise ValueError('invalid na_position: {!r}'.format(na_position))
-    return indexer
-
-
-class _KeyMapper(object):
-
-    """
-    Ease my suffering. Map compressed group id -> key tuple
-    """
-
-    def __init__(self, comp_ids, ngroups, labels, levels):
-        self.levels = levels
-        self.labels = labels
-        self.comp_ids = comp_ids.astype(np.int64)
-
-        self.k = len(labels)
-        self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)]
-
-        self._populate_tables()
-
-    def _populate_tables(self):
-        for labs, table in zip(self.labels, self.tables):
-            table.map(self.comp_ids, labs.astype(np.int64))
-
-    def get_key(self, comp_id):
-        return tuple(level[table.get_item(comp_id)]
-                     for table, level in zip(self.tables, self.levels))
-
-
-def _get_indices_dict(label_list, keys):
-    shape = list(map(len, keys))
-
-    group_index = get_group_index(label_list, shape, sort=True, xnull=True)
-    ngroups = ((group_index.size and group_index.max()) + 1) \
-        if _int64_overflow_possible(shape) \
-        else np.prod(shape, dtype='i8')
-
-    sorter = _get_group_index_sorter(group_index, ngroups)
-
-    sorted_labels = [lab.take(sorter) for lab in label_list]
-    group_index = group_index.take(sorter)
-
-    return lib.indices_fast(sorter, group_index, keys, sorted_labels)
-
-
-# ----------------------------------------------------------------------
-# sorting levels...cleverly?
-
-def _get_group_index_sorter(group_index, ngroups):
-    """
-    _algos.groupsort_indexer implements `counting sort` and it is at least
-    O(ngroups), where
-        ngroups = prod(shape)
-        shape = map(len, keys)
-    that is, linear in the number of combinations (cartesian product) of unique
-    values of groupby keys. This can be huge when doing multi-key groupby.
-    np.argsort(kind='mergesort') is O(count x log(count)) where count is the
-    length of the data-frame;
-    Both algorithms are `stable` sort and that is necessary for correctness of
-    groupby operations. e.g. consider:
-        df.groupby(key)[col].transform('first')
-    """
-    count = len(group_index)
-    alpha = 0.0  # taking complexities literally; there may be
-    beta = 1.0  # some room for fine-tuning these parameters
-    do_groupsort = (count > 0 and ((alpha + beta * ngroups) <
-                                   (count * np.log(count))))
-    if do_groupsort:
-        sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index),
-                                             ngroups)
-        return _ensure_platform_int(sorter)
-    else:
-        return group_index.argsort(kind='mergesort')
-
-
-def _compress_group_index(group_index, sort=True):
-    """
-    Group_index is offsets into cartesian product of all possible labels. This
-    space can be huge, so this function compresses it, by computing offsets
-    (comp_ids) into the list of unique labels (obs_group_ids).
-    """
-
-    size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT)
-    table = _hash.Int64HashTable(size_hint)
-
-    group_index = _ensure_int64(group_index)
-
-    # note, group labels come out ascending (ie, 1,2,3 etc)
-    comp_ids, obs_group_ids = table.get_labels_groupby(group_index)
-
-    if sort and len(obs_group_ids) > 0:
-        obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids)
-
-    return comp_ids, obs_group_ids
-
-
-def _reorder_by_uniques(uniques, labels):
-    # sorter is index where elements ought to go
-    sorter = uniques.argsort()
-
-    # reverse_indexer is where elements came from
-    reverse_indexer = np.empty(len(sorter), dtype=np.int64)
-    reverse_indexer.put(sorter, np.arange(len(sorter)))
-
-    mask = labels < 0
-
-    # move labels to right locations (ie, unsort ascending labels)
-    labels = algos.take_nd(reverse_indexer, labels, allow_fill=False)
-    np.putmask(labels, mask, -1)
-
-    # sort observed ids
-    uniques = algos.take_nd(uniques, sorter, allow_fill=False)
-
-    return uniques, labels
-
-
-def numpy_groupby(data, labels, axis=0):
-    s = np.argsort(labels)
-    keys, inv = np.unique(labels, return_inverse=True)
-    i = inv.take(s)
-    groups_at = np.where(i != np.concatenate(([-1], i[:-1])))[0]
-    ordered_data = data.take(s, axis=axis)
-    group_sums = np.add.reduceat(ordered_data, groups_at, axis=axis)
-
-    return group_sums
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index cebaf4e3fd89b..5fc0d590a6885 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -20,7 +20,8 @@
 from pandas._sparse import IntIndex
 
 from pandas.core.categorical import Categorical, _factorize_from_iterable
-from pandas.core.groupby import get_group_index, _compress_group_index
+from pandas.core.sorting import (get_group_index, compress_group_index,
+                                 decons_obs_group_ids)
 
 import pandas.core.algorithms as algos
 import pandas.algos as _algos
@@ -156,7 +157,7 @@ def get_result(self):
 
         # filter out missing levels
         if values.shape[1] > 0:
-            col_inds, obs_ids = _compress_group_index(self.sorted_labels[-1])
+            col_inds, obs_ids = compress_group_index(self.sorted_labels[-1])
             # rare case, level values not observed
             if len(obs_ids) < self.full_shape[1]:
                 inds = (value_mask.sum(0) > 0).nonzero()[0]
@@ -245,8 +246,6 @@ def get_new_index(self):
 
 
 def _unstack_multiple(data, clocs):
-    from pandas.core.groupby import decons_obs_group_ids
-
     if len(clocs) == 0:
         return data
 
@@ -268,7 +267,7 @@ def _unstack_multiple(data, clocs):
     shape = [len(x) for x in clevels]
     group_index = get_group_index(clabels, shape, sort=False, xnull=False)
 
-    comp_ids, obs_ids = _compress_group_index(group_index, sort=False)
+    comp_ids, obs_ids = compress_group_index(group_index, sort=False)
     recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
                                          xnull=False)
 
@@ -459,10 +458,8 @@ def _unstack_frame(obj, level, fill_value=None):
 
 
 def get_compressed_ids(labels, sizes):
-    from pandas.core.groupby import get_group_index
-
     ids = get_group_index(labels, sizes, sort=True, xnull=False)
-    return _compress_group_index(ids, sort=True)
+    return compress_group_index(ids, sort=True)
 
 
 def stack(frame, level=-1, dropna=True):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e1eac8f66017e..da47ab5dfb003 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1786,12 +1786,12 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
             new_index, indexer = index.sortlevel(level, ascending=ascending,
                                                  sort_remaining=sort_remaining)
         elif isinstance(index, MultiIndex):
-            from pandas.core.groupby import _lexsort_indexer
-            indexer = _lexsort_indexer(index.labels, orders=ascending)
+            from pandas.core.sorting import lexsort_indexer
+            indexer = lexsort_indexer(index.labels, orders=ascending)
         else:
-            from pandas.core.groupby import _nargsort
-            indexer = _nargsort(index, kind=kind, ascending=ascending,
-                                na_position=na_position)
+            from pandas.core.sorting import nargsort
+            indexer = nargsort(index, kind=kind, ascending=ascending,
+                               na_position=na_position)
 
         indexer = _ensure_platform_int(indexer)
         new_index = index.take(indexer)
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
new file mode 100644
index 0000000000000..71314da7745c0
--- /dev/null
+++ b/pandas/core/sorting.py
@@ -0,0 +1,357 @@
+""" miscellaneous sorting / groupby utilities """
+
+import numpy as np
+from pandas.compat import long
+from pandas.core.categorical import Categorical
+from pandas.types.common import (_ensure_platform_int,
+                                 _ensure_int64,
+                                 is_categorical_dtype)
+from pandas.types.missing import isnull
+import pandas.core.algorithms as algos
+import pandas.algos as _algos
+import pandas.hashtable as _hash
+from pandas import lib
+
+
+_INT64_MAX = np.iinfo(np.int64).max
+
+
+def get_group_index(labels, shape, sort, xnull):
+    """
+    For the particular label_list, gets the offsets into the hypothetical list
+    representing the totally ordered cartesian product of all possible label
+    combinations, *as long as* this space fits within int64 bounds;
+    otherwise, though group indices identify unique combinations of
+    labels, they cannot be deconstructed.
+    - If `sort`, rank of returned ids preserve lexical ranks of labels.
+      i.e. returned id's can be used to do lexical sort on labels;
+    - If `xnull` nulls (-1 labels) are passed through.
+
+    Parameters
+    ----------
+    labels: sequence of arrays
+        Integers identifying levels at each location
+    shape: sequence of ints same length as labels
+        Number of unique levels at each location
+    sort: boolean
+        If the ranks of returned ids should match lexical ranks of labels
+    xnull: boolean
+        If true nulls are excluded. i.e. -1 values in the labels are
+        passed through
+    Returns
+    -------
+    An array of type int64 where two elements are equal if their corresponding
+    labels are equal at all location.
+    """
+    def _int64_cut_off(shape):
+        acc = long(1)
+        for i, mul in enumerate(shape):
+            acc *= long(mul)
+            if not acc < _INT64_MAX:
+                return i
+        return len(shape)
+
+    def loop(labels, shape):
+        # how many levels can be done without overflow:
+        nlev = _int64_cut_off(shape)
+
+        # compute flat ids for the first `nlev` levels
+        stride = np.prod(shape[1:nlev], dtype='i8')
+        out = stride * labels[0].astype('i8', subok=False, copy=False)
+
+        for i in range(1, nlev):
+            if shape[i] == 0:
+                stride = 0
+            else:
+                stride //= shape[i]
+            out += labels[i] * stride
+
+        if xnull:  # exclude nulls
+            mask = labels[0] == -1
+            for lab in labels[1:nlev]:
+                mask |= lab == -1
+            out[mask] = -1
+
+        if nlev == len(shape):  # all levels done!
+            return out
+
+        # compress what has been done so far in order to avoid overflow
+        # to retain lexical ranks, obs_ids should be sorted
+        comp_ids, obs_ids = compress_group_index(out, sort=sort)
+
+        labels = [comp_ids] + labels[nlev:]
+        shape = [len(obs_ids)] + shape[nlev:]
+
+        return loop(labels, shape)
+
+    def maybe_lift(lab, size):  # pormote nan values
+        return (lab + 1, size + 1) if (lab == -1).any() else (lab, size)
+
+    labels = map(_ensure_int64, labels)
+    if not xnull:
+        labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
+
+    return loop(list(labels), list(shape))
+
+
+def is_int64_overflow_possible(shape):
+    the_prod = long(1)
+    for x in shape:
+        the_prod *= long(x)
+
+    return the_prod >= _INT64_MAX
+
+
+def decons_group_index(comp_labels, shape):
+    # reconstruct labels
+    if is_int64_overflow_possible(shape):
+        # at some point group indices are factorized,
+        # and may not be deconstructed here! wrong path!
+        raise ValueError('cannot deconstruct factorized group indices!')
+
+    label_list = []
+    factor = 1
+    y = 0
+    x = comp_labels
+    for i in reversed(range(len(shape))):
+        labels = (x - y) % (factor * shape[i]) // factor
+        np.putmask(labels, comp_labels < 0, -1)
+        label_list.append(labels)
+        y = labels * factor
+        factor *= shape[i]
+    return label_list[::-1]
+
+
+def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
+    """
+    reconstruct labels from observed group ids
+
+    Parameters
+    ----------
+    xnull: boolean,
+        if nulls are excluded; i.e. -1 labels are passed through
+    """
+    from pandas.hashtable import unique_label_indices
+
+    if not xnull:
+        lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8')
+        shape = np.asarray(shape, dtype='i8') + lift
+
+    if not is_int64_overflow_possible(shape):
+        # obs ids are deconstructable! take the fast route!
+        out = decons_group_index(obs_ids, shape)
+        return out if xnull or not lift.any() \
+            else [x - y for x, y in zip(out, lift)]
+
+    i = unique_label_indices(comp_ids)
+    i8copy = lambda a: a.astype('i8', subok=False, copy=True)
+    return [i8copy(lab[i]) for lab in labels]
+
+
+def indexer_from_factorized(labels, shape, compress=True):
+    ids = get_group_index(labels, shape, sort=True, xnull=False)
+
+    if not compress:
+        ngroups = (ids.size and ids.max()) + 1
+    else:
+        ids, obs = compress_group_index(ids, sort=True)
+        ngroups = len(obs)
+
+    return get_group_index_sorter(ids, ngroups)
+
+
+def lexsort_indexer(keys, orders=None, na_position='last'):
+    labels = []
+    shape = []
+    if isinstance(orders, bool):
+        orders = [orders] * len(keys)
+    elif orders is None:
+        orders = [True] * len(keys)
+
+    for key, order in zip(keys, orders):
+
+        # we are already a Categorical
+        if is_categorical_dtype(key):
+            c = key
+
+        # create the Categorical
+        else:
+            c = Categorical(key, ordered=True)
+
+        if na_position not in ['last', 'first']:
+            raise ValueError('invalid na_position: {!r}'.format(na_position))
+
+        n = len(c.categories)
+        codes = c.codes.copy()
+
+        mask = (c.codes == -1)
+        if order:  # ascending
+            if na_position == 'last':
+                codes = np.where(mask, n, codes)
+            elif na_position == 'first':
+                codes += 1
+        else:  # not order means descending
+            if na_position == 'last':
+                codes = np.where(mask, n, n - codes - 1)
+            elif na_position == 'first':
+                codes = np.where(mask, 0, n - codes)
+        if mask.any():
+            n += 1
+
+        shape.append(n)
+        labels.append(codes)
+
+    return indexer_from_factorized(labels, shape)
+
+
+def nargsort(items, kind='quicksort', ascending=True, na_position='last'):
+    """
+    This is intended to be a drop-in replacement for np.argsort which
+    handles NaNs. It adds ascending and na_position parameters.
+    GH #6399, #5231
+    """
+
+    # specially handle Categorical
+    if is_categorical_dtype(items):
+        return items.argsort(ascending=ascending)
+
+    items = np.asanyarray(items)
+    idx = np.arange(len(items))
+    mask = isnull(items)
+    non_nans = items[~mask]
+    non_nan_idx = idx[~mask]
+    nan_idx = np.nonzero(mask)[0]
+    if not ascending:
+        non_nans = non_nans[::-1]
+        non_nan_idx = non_nan_idx[::-1]
+    indexer = non_nan_idx[non_nans.argsort(kind=kind)]
+    if not ascending:
+        indexer = indexer[::-1]
+    # Finally, place the NaNs at the end or the beginning according to
+    # na_position
+    if na_position == 'last':
+        indexer = np.concatenate([indexer, nan_idx])
+    elif na_position == 'first':
+        indexer = np.concatenate([nan_idx, indexer])
+    else:
+        raise ValueError('invalid na_position: {!r}'.format(na_position))
+    return indexer
+
+
+class _KeyMapper(object):
+
+    """
+    Ease my suffering. Map compressed group id -> key tuple
+    """
+
+    def __init__(self, comp_ids, ngroups, levels, labels):
+        self.levels = levels
+        self.labels = labels
+        self.comp_ids = comp_ids.astype(np.int64)
+
+        self.k = len(labels)
+        self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)]
+
+        self._populate_tables()
+
+    def _populate_tables(self):
+        for labs, table in zip(self.labels, self.tables):
+            table.map(self.comp_ids, labs.astype(np.int64))
+
+    def get_key(self, comp_id):
+        return tuple(level[table.get_item(comp_id)]
+                     for table, level in zip(self.tables, self.levels))
+
+
+def get_flattened_iterator(comp_ids, ngroups, levels, labels):
+    # provide "flattened" iterator for multi-group setting
+    mapper = _KeyMapper(comp_ids, ngroups, levels, labels)
+    return [mapper.get_key(i) for i in range(ngroups)]
+
+
+def get_indexer_dict(label_list, keys):
+    """ return a diction of {labels} -> {indexers} """
+    shape = list(map(len, keys))
+
+    group_index = get_group_index(label_list, shape, sort=True, xnull=True)
+    ngroups = ((group_index.size and group_index.max()) + 1) \
+        if is_int64_overflow_possible(shape) \
+        else np.prod(shape, dtype='i8')
+
+    sorter = get_group_index_sorter(group_index, ngroups)
+
+    sorted_labels = [lab.take(sorter) for lab in label_list]
+    group_index = group_index.take(sorter)
+
+    return lib.indices_fast(sorter, group_index, keys, sorted_labels)
+
+
+# ----------------------------------------------------------------------
+# sorting levels...cleverly?
+
+def get_group_index_sorter(group_index, ngroups):
+    """
+    _algos.groupsort_indexer implements `counting sort` and it is at least
+    O(ngroups), where
+        ngroups = prod(shape)
+        shape = map(len, keys)
+    that is, linear in the number of combinations (cartesian product) of unique
+    values of groupby keys. This can be huge when doing multi-key groupby.
+    np.argsort(kind='mergesort') is O(count x log(count)) where count is the
+    length of the data-frame;
+    Both algorithms are `stable` sort and that is necessary for correctness of
+    groupby operations. e.g. consider:
+        df.groupby(key)[col].transform('first')
+    """
+    count = len(group_index)
+    alpha = 0.0  # taking complexities literally; there may be
+    beta = 1.0  # some room for fine-tuning these parameters
+    do_groupsort = (count > 0 and ((alpha + beta * ngroups) <
+                                   (count * np.log(count))))
+    if do_groupsort:
+        sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index),
+                                             ngroups)
+        return _ensure_platform_int(sorter)
+    else:
+        return group_index.argsort(kind='mergesort')
+
+
+def compress_group_index(group_index, sort=True):
+    """
+    Group_index is offsets into cartesian product of all possible labels. This
+    space can be huge, so this function compresses it, by computing offsets
+    (comp_ids) into the list of unique labels (obs_group_ids).
+    """
+
+    size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT)
+    table = _hash.Int64HashTable(size_hint)
+
+    group_index = _ensure_int64(group_index)
+
+    # note, group labels come out ascending (ie, 1,2,3 etc)
+    comp_ids, obs_group_ids = table.get_labels_groupby(group_index)
+
+    if sort and len(obs_group_ids) > 0:
+        obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids)
+
+    return comp_ids, obs_group_ids
+
+
+def _reorder_by_uniques(uniques, labels):
+    # sorter is index where elements ought to go
+    sorter = uniques.argsort()
+
+    # reverse_indexer is where elements came from
+    reverse_indexer = np.empty(len(sorter), dtype=np.int64)
+    reverse_indexer.put(sorter, np.arange(len(sorter)))
+
+    mask = labels < 0
+
+    # move labels to right locations (ie, unsort ascending labels)
+    labels = algos.take_nd(reverse_indexer, labels, allow_fill=False)
+    np.putmask(labels, mask, -1)
+
+    # sort observed ids
+    uniques = algos.take_nd(uniques, sorter, allow_fill=False)
+
+    return uniques, labels
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index 9ab07d87fd13b..653ba1fee5691 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -663,7 +663,7 @@ def is_unique(self):
                                                    False: 'first'})
     @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs)
     def duplicated(self, keep='first'):
-        from pandas.core.groupby import get_group_index
+        from pandas.core.sorting import get_group_index
         from pandas.hashtable import duplicated_int64
 
         shape = map(len, self.levels)
@@ -1405,7 +1405,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
             Indices of output values in original index
 
         """
-        from pandas.core.groupby import _indexer_from_factorized
+        from pandas.core.sorting import indexer_from_factorized
 
         if isinstance(level, (compat.string_types, int)):
             level = [level]
@@ -1417,8 +1417,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
             if not len(level) == len(ascending):
                 raise ValueError("level must have same length as ascending")
 
-            from pandas.core.groupby import _lexsort_indexer
-            indexer = _lexsort_indexer(self.labels, orders=ascending)
+            from pandas.core.sorting import lexsort_indexer
+            indexer = lexsort_indexer(self.labels, orders=ascending)
 
         # level ordering
         else:
@@ -1436,8 +1436,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True):
             else:
                 sortorder = level[0]
 
-            indexer = _indexer_from_factorized(primary, primshp,
-                                               compress=False)
+            indexer = indexer_from_factorized(primary, primshp,
+                                              compress=False)
 
             if not ascending:
                 indexer = indexer[::-1]
diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py
index 1640858802047..46ddb5a5318fb 100644
--- a/pandas/tests/groupby/test_filters.py
+++ b/pandas/tests/groupby/test_filters.py
@@ -616,24 +616,3 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     expected = f(df.groupby(tups)[field])
     for k, v in compat.iteritems(expected):
         assert (result[k] == v)
-
-
-def test_decons():
-    from pandas.core.groupby import decons_group_index, get_group_index
-
-    def testit(label_list, shape):
-        group_index = get_group_index(label_list, shape, sort=True, xnull=True)
-        label_list2 = decons_group_index(group_index, shape)
-
-        for a, b in zip(label_list, label_list2):
-            assert (np.array_equal(a, b))
-
-    shape = (4, 5, 6)
-    label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile(
-        [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile(
-            [5, 1, 0, 2, 3, 0, 5, 4], 100)]
-    testit(label_list, shape)
-
-    shape = (10000, 10000)
-    label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)]
-    testit(label_list, shape)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index d625fa07d932c..3a6a9eaaa8e72 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1510,59 +1510,6 @@ def check_nunique(df, keys, as_index=True):
             check_nunique(frame, ['jim'], as_index=False)
             check_nunique(frame, ['jim', 'joe'], as_index=False)
 
-    def test_series_groupby_value_counts(self):
-        from itertools import product
-        np.random.seed(1234)
-
-        def rebuild_index(df):
-            arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
-            df.index = MultiIndex.from_arrays(arr, names=df.index.names)
-            return df
-
-        def check_value_counts(df, keys, bins):
-            for isort, normalize, sort, ascending, dropna \
-                    in product((False, True), repeat=5):
-
-                kwargs = dict(normalize=normalize, sort=sort,
-                              ascending=ascending, dropna=dropna, bins=bins)
-
-                gr = df.groupby(keys, sort=isort)
-                left = gr['3rd'].value_counts(**kwargs)
-
-                gr = df.groupby(keys, sort=isort)
-                right = gr['3rd'].apply(Series.value_counts, **kwargs)
-                right.index.names = right.index.names[:-1] + ['3rd']
-
-                # have to sort on index because of unstable sort on values
-                left, right = map(rebuild_index, (left, right))  # xref GH9212
-                assert_series_equal(left.sort_index(), right.sort_index())
-
-        def loop(df):
-            bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2)
-            keys = '1st', '2nd', ('1st', '2nd')
-            for k, b in product(keys, bins):
-                check_value_counts(df, k, b)
-
-        days = date_range('2015-08-24', periods=10)
-
-        for n, m in product((100, 1000), (5, 20)):
-            frame = DataFrame({
-                '1st': np.random.choice(
-                    list('abcd'), n),
-                '2nd': np.random.choice(days, n),
-                '3rd': np.random.randint(1, m + 1, n)
-            })
-
-            loop(frame)
-
-            frame.loc[1::11, '1st'] = nan
-            frame.loc[3::17, '2nd'] = nan
-            frame.loc[7::19, '3rd'] = nan
-            frame.loc[8::19, '3rd'] = nan
-            frame.loc[9::19, '3rd'] = nan
-
-            loop(frame)
-
     def test_multiindex_passthru(self):
 
         # GH 7997
@@ -3071,22 +3018,6 @@ def test_panel_groupby(self):
         agged = grouped.mean()
         self.assert_index_equal(agged.minor_axis, Index([0, 1]))
 
-    def test_numpy_groupby(self):
-        from pandas.core.groupby import numpy_groupby
-
-        data = np.random.randn(100, 100)
-        labels = np.random.randint(0, 10, size=100)
-
-        df = DataFrame(data)
-
-        result = df.groupby(labels).sum().values
-        expected = numpy_groupby(data, labels)
-        assert_almost_equal(result, expected)
-
-        result = df.groupby(labels, axis=1).sum().values
-        expected = numpy_groupby(data, labels, axis=1)
-        assert_almost_equal(result, expected)
-
     def test_groupby_2d_malformed(self):
         d = DataFrame(index=lrange(2))
         d['group'] = ['g1', 'g2']
@@ -3112,85 +3043,6 @@ def test_int32_overflow(self):
         right = df.groupby(['D', 'C', 'B', 'A']).sum()
         self.assertEqual(len(left), len(right))
 
-    def test_int64_overflow(self):
-        from pandas.core.groupby import _int64_overflow_possible
-
-        B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500)))
-        A = np.arange(2500)
-        df = DataFrame({'A': A,
-                        'B': B,
-                        'C': A,
-                        'D': B,
-                        'E': A,
-                        'F': B,
-                        'G': A,
-                        'H': B,
-                        'values': np.random.randn(2500)})
-
-        lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])
-        rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A'])
-
-        left = lg.sum()['values']
-        right = rg.sum()['values']
-
-        exp_index, _ = left.index.sortlevel()
-        self.assert_index_equal(left.index, exp_index)
-
-        exp_index, _ = right.index.sortlevel(0)
-        self.assert_index_equal(right.index, exp_index)
-
-        tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'
-                                   ]].values))
-        tups = com._asarray_tuplesafe(tups)
-
-        expected = df.groupby(tups).sum()['values']
-
-        for k, v in compat.iteritems(expected):
-            self.assertEqual(left[k], right[k[::-1]])
-            self.assertEqual(left[k], v)
-        self.assertEqual(len(left), len(right))
-
-        # GH9096
-        values = range(55109)
-        data = pd.DataFrame.from_dict({'a': values,
-                                       'b': values,
-                                       'c': values,
-                                       'd': values})
-        grouped = data.groupby(['a', 'b', 'c', 'd'])
-        self.assertEqual(len(grouped), len(values))
-
-        arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5))
-        i = np.random.choice(len(arr), len(arr) * 4)
-        arr = np.vstack((arr, arr[i]))  # add sume duplicate rows
-
-        i = np.random.permutation(len(arr))
-        arr = arr[i]  # shuffle rows
-
-        df = DataFrame(arr, columns=list('abcde'))
-        df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10
-        gr = df.groupby(list('abcde'))
-
-        # verify this is testing what it is supposed to test!
-        self.assertTrue(_int64_overflow_possible(gr.grouper.shape))
-
-        # mannually compute groupings
-        jim, joe = defaultdict(list), defaultdict(list)
-        for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']):
-            jim[key].append(a)
-            joe[key].append(b)
-
-        self.assertEqual(len(gr), len(jim))
-        mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde'))
-
-        def aggr(func):
-            f = lambda a: np.fromiter(map(func, a), dtype='f8')
-            arr = np.vstack((f(jim.values()), f(joe.values()))).T
-            res = DataFrame(arr, columns=['jim', 'joe'], index=mi)
-            return res.sort_index()
-
-        assert_frame_equal(gr.mean(), aggr(np.mean))
-        assert_frame_equal(gr.median(), aggr(np.median))
-
     def test_groupby_sort_multi(self):
         df = DataFrame({'a': ['foo', 'bar', 'baz'],
                         'b': [3, 2, 1],
@@ -4451,24 +4303,3 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     expected = f(df.groupby(tups)[field])
     for k, v in compat.iteritems(expected):
         assert (result[k] == v)
-
-
-def test_decons():
-    from pandas.core.groupby import decons_group_index, get_group_index
-
-    def testit(label_list, shape):
-        group_index = get_group_index(label_list, shape, sort=True, xnull=True)
-        label_list2 = decons_group_index(group_index, shape)
-
-        for a, b in zip(label_list, label_list2):
-            assert (np.array_equal(a, b))
-
-    shape = (4, 5, 6)
-    label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile(
-        [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile(
-            [5, 1, 0, 2, 3, 0, 5, 4], 100)]
-    testit(label_list, shape)
-
-    shape = (10000, 10000)
-    label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)]
-    testit(label_list, shape)
diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py
deleted file mode 100644
index 9395304385681..0000000000000
--- a/pandas/tests/groupby/test_misc.py
+++ /dev/null
@@ -1,101 +0,0 @@
-""" misc non-groupby routines, as they are defined in core/groupby.py """
-
-import pytest
-import numpy as np
-from numpy import nan
-from pandas.util import testing as tm
-from pandas.core.groupby import _nargsort, _lexsort_indexer
-
-
-class TestSorting(tm.TestCase):
-
-    def test_lexsort_indexer(self):
-        keys = [[nan] * 5 + list(range(100)) + [nan] * 5]
-        # orders=True, na_position='last'
-        result = _lexsort_indexer(keys, orders=True, na_position='last')
-        exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
-        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
-
-        # orders=True, na_position='first'
-        result = _lexsort_indexer(keys, orders=True, na_position='first')
-        exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
-        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
-
-        # orders=False, na_position='last'
-        result = _lexsort_indexer(keys, orders=False, na_position='last')
-        exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
-        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
-
-        # orders=False, na_position='first'
-        result = _lexsort_indexer(keys, orders=False, na_position='first')
-        exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
-        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
-
-    def test_nargsort(self):
-        # np.argsort(items) places NaNs last
-        items = [nan] * 5 + list(range(100)) + [nan] * 5
-        # np.argsort(items2) may not place NaNs first
-        items2 = np.array(items, dtype='O')
-
-        try:
-            # GH 2785; due to a regression in NumPy1.6.2
-            np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i'))
-            np.argsort(items2, kind='mergesort')
-        except TypeError:
-            pytest.skip('requested sort not available for type')
-
-        # mergesort is the most difficult to get right because we want it to be
-        # stable.
-
-        # According to numpy/core/tests/test_multiarray, """The number of
-        # sorted items must be greater than ~50 to check the actual algorithm
-        # because quick and merge sort fall over to insertion sort for small
-        # arrays."""
-
-        # mergesort, ascending=True, na_position='last'
-        result = _nargsort(items, kind='mergesort', ascending=True,
-                           na_position='last')
-        exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
-
-        # mergesort, ascending=True, na_position='first'
-        result = _nargsort(items, kind='mergesort', ascending=True,
-                           na_position='first')
-        exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
-
-        # mergesort, ascending=False, na_position='last'
-        result = _nargsort(items, kind='mergesort', ascending=False,
-                           na_position='last')
-        exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
-
-        # mergesort, ascending=False, na_position='first'
-        result = _nargsort(items, kind='mergesort', ascending=False,
-                           na_position='first')
-        exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
-
-        # mergesort, ascending=True, na_position='last'
-        result = _nargsort(items2, kind='mergesort', ascending=True,
-                           na_position='last')
-        exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
-
-        # mergesort, ascending=True, na_position='first'
-        result = _nargsort(items2, kind='mergesort', ascending=True,
-                           na_position='first')
-        exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
-
-        # mergesort, ascending=False, na_position='last'
-        result = _nargsort(items2, kind='mergesort', ascending=False,
-                           na_position='last')
-        exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
-
-        # mergesort, ascending=False, na_position='first'
-        result = _nargsort(items2, kind='mergesort', ascending=False,
-                           na_position='first')
-        exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
-        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
new file mode 100644
index 0000000000000..801d0da070112
--- /dev/null
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -0,0 +1,60 @@
+import pytest
+
+from itertools import product
+import numpy as np
+
+from pandas.util import testing as tm
+from pandas import MultiIndex, DataFrame, Series, date_range
+
+
+@pytest.mark.parametrize("n,m", product((100, 1000), (5, 20)))
+def test_series_groupby_value_counts(n, m):
+    np.random.seed(1234)
+
+    def rebuild_index(df):
+        arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
+        df.index = MultiIndex.from_arrays(arr, names=df.index.names)
+        return df
+
+    def check_value_counts(df, keys, bins):
+        for isort, normalize, sort, ascending, dropna \
+                in product((False, True), repeat=5):
+
+            kwargs = dict(normalize=normalize, sort=sort,
+                          ascending=ascending, dropna=dropna, bins=bins)
+
+            gr = df.groupby(keys, sort=isort)
+            left = gr['3rd'].value_counts(**kwargs)
+
+            gr = df.groupby(keys, sort=isort)
+            right = gr['3rd'].apply(Series.value_counts, **kwargs)
+            right.index.names = right.index.names[:-1] + ['3rd']
+
+            # have to sort on index because of unstable sort on values
+            left, right = map(rebuild_index, (left, right))  # xref GH9212
+            tm.assert_series_equal(left.sort_index(), right.sort_index())
+
+    def loop(df):
+        bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2)
+        keys = '1st', '2nd', ('1st', '2nd')
+        for k, b in product(keys, bins):
+            check_value_counts(df, k, b)
+
+    days = date_range('2015-08-24', periods=10)
+
+    frame = DataFrame({
+        '1st': np.random.choice(
+            list('abcd'), n),
+        '2nd': np.random.choice(days, n),
+        '3rd': np.random.randint(1, m + 1, n)
+    })
+
+    loop(frame)
+
+    frame.loc[1::11, '1st'] = np.nan
+    frame.loc[3::17, '2nd'] = np.nan
+    frame.loc[7::19, '3rd'] = np.nan
+    frame.loc[8::19, '3rd'] = np.nan
+    frame.loc[9::19, '3rd'] = np.nan
+
+    loop(frame)
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
new file mode 100644
index 0000000000000..99361695b2371
--- /dev/null
+++ b/pandas/tests/test_sorting.py
@@ -0,0 +1,339 @@
+import pytest
+from itertools import product
+from collections import defaultdict
+
+import numpy as np
+from numpy import nan
+import pandas as pd
+from pandas.core import common as com
+from pandas import DataFrame, MultiIndex, merge, concat, Series, compat
+from pandas.util import testing as tm
+from pandas.util.testing import assert_frame_equal, assert_series_equal
+from pandas.core.sorting import (is_int64_overflow_possible,
+                                 decons_group_index,
+                                 get_group_index,
+                                 nargsort,
+                                 lexsort_indexer)
+
+
+class TestSorting(tm.TestCase):
+
+    def test_int64_overflow(self):
+
+        B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500)))
+        A = np.arange(2500)
+        df = DataFrame({'A': A,
+                        'B': B,
+                        'C': A,
+                        'D': B,
+                        'E': A,
+                        'F': B,
+                        'G': A,
+                        'H': B,
+                        'values': np.random.randn(2500)})
+
+        lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])
+        rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A'])
+
+        left = lg.sum()['values']
+        right = rg.sum()['values']
+
+        exp_index, _ = left.index.sortlevel()
+        self.assert_index_equal(left.index, exp_index)
+
+        exp_index, _ = right.index.sortlevel(0)
+        self.assert_index_equal(right.index, exp_index)
+
+        tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'
+                                   ]].values))
+        tups = com._asarray_tuplesafe(tups)
+
+        expected = df.groupby(tups).sum()['values']
+
+        for k, v in compat.iteritems(expected):
+            self.assertEqual(left[k], right[k[::-1]])
+            self.assertEqual(left[k], v)
+        self.assertEqual(len(left), len(right))
+
+        # GH9096
+        values = range(55109)
+        data = pd.DataFrame.from_dict({'a': values,
+                                       'b': values,
+                                       'c': values,
+                                       'd': values})
+        grouped = data.groupby(['a', 'b', 'c', 'd'])
+        self.assertEqual(len(grouped), len(values))
+
+        arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5))
+        i = np.random.choice(len(arr), len(arr) * 4)
+        arr = np.vstack((arr, arr[i]))  # add sume duplicate rows
+
+        i = np.random.permutation(len(arr))
+        arr = arr[i]  # shuffle rows
+
+        df = DataFrame(arr, columns=list('abcde'))
+        df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10
+        gr = df.groupby(list('abcde'))
+
+        # verify this is testing what it is supposed to test!
+        self.assertTrue(is_int64_overflow_possible(gr.grouper.shape))
+
+        # mannually compute groupings
+        jim, joe = defaultdict(list), defaultdict(list)
+        for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']):
+            jim[key].append(a)
+            joe[key].append(b)
+
+        self.assertEqual(len(gr), len(jim))
+        mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde'))
+
+        def aggr(func):
+            f = lambda a: np.fromiter(map(func, a), dtype='f8')
+            arr = np.vstack((f(jim.values()), f(joe.values()))).T
+            res = DataFrame(arr, columns=['jim', 'joe'], index=mi)
+            return res.sort_index()
+
+        assert_frame_equal(gr.mean(), aggr(np.mean))
+        assert_frame_equal(gr.median(), aggr(np.median))
+
+    def test_lexsort_indexer(self):
+        keys = [[nan] * 5 + list(range(100)) + [nan] * 5]
+        # orders=True, na_position='last'
+        result = lexsort_indexer(keys, orders=True, na_position='last')
+        exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
+        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
+
+        # orders=True, na_position='first'
+        result = lexsort_indexer(keys, orders=True, na_position='first')
+        exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
+        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
+
+        # orders=False, na_position='last'
+        result = lexsort_indexer(keys, orders=False, na_position='last')
+        exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
+        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
+
+        # orders=False, na_position='first'
+        result = lexsort_indexer(keys, orders=False, na_position='first')
+        exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
+        tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
+
+    def test_nargsort(self):
+        # np.argsort(items) places NaNs last
+        items = [nan] * 5 + list(range(100)) + [nan] * 5
+        # np.argsort(items2) may not place NaNs first
+        items2 = np.array(items, dtype='O')
+
+        try:
+            # GH 2785; due to a regression in NumPy1.6.2
+            np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i'))
+            np.argsort(items2, kind='mergesort')
+        except TypeError:
+            pytest.skip('requested sort not available for type')
+
+        # mergesort is the most difficult to get right because we want it to be
+        # stable.
+
+        # According to numpy/core/tests/test_multiarray, """The number of
+        # sorted items must be greater than ~50 to check the actual algorithm
+        # because quick and merge sort fall over to insertion sort for small
+        # arrays."""
+
+        # mergesort, ascending=True, na_position='last'
+        result = nargsort(items, kind='mergesort', ascending=True,
+                          na_position='last')
+        exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+        # mergesort, ascending=True, na_position='first'
+        result = nargsort(items, kind='mergesort', ascending=True,
+                          na_position='first')
+        exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+        # mergesort, ascending=False, na_position='last'
+        result = nargsort(items, kind='mergesort', ascending=False,
+                          na_position='last')
+        exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+        # mergesort, ascending=False, na_position='first'
+        result = nargsort(items, kind='mergesort', ascending=False,
+                          na_position='first')
+        exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+        # mergesort, ascending=True, na_position='last'
+        result = nargsort(items2, kind='mergesort', ascending=True,
+                          na_position='last')
+        exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+        # mergesort, ascending=True, na_position='first'
+        result = nargsort(items2, kind='mergesort', ascending=True,
+                          na_position='first')
+        exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+        # mergesort, ascending=False, na_position='last'
+        result = nargsort(items2, kind='mergesort', ascending=False,
+                          na_position='last')
+        exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+        # mergesort, ascending=False, na_position='first'
+        result = nargsort(items2, kind='mergesort', ascending=False,
+                          na_position='first')
+        exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
+        tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
+
+
+class TestMerge(tm.TestCase):
+
+    @pytest.mark.slow
+    def test_int64_overflow_issues(self):
+
+        # #2690, combinatorial explosion
+        df1 = DataFrame(np.random.randn(1000, 7),
+                        columns=list('ABCDEF') + ['G1'])
+        df2 = DataFrame(np.random.randn(1000, 7),
+                        columns=list('ABCDEF') + ['G2'])
+
+        # it works!
+        result = merge(df1, df2, how='outer')
+        self.assertTrue(len(result) == 2000)
+
+        low, high, n = -1 << 10, 1 << 10, 1 << 20
+        left = DataFrame(np.random.randint(low, high, (n, 7)),
+                         columns=list('ABCDEFG'))
+        left['left'] = left.sum(axis=1)
+
+        # one-2-one match
+        i = np.random.permutation(len(left))
+        right = left.iloc[i].copy()
+        right.columns = right.columns[:-1].tolist() + ['right']
+        right.index = np.arange(len(right))
+        right['right'] *= -1
+
+        out = merge(left, right, how='outer')
+        self.assertEqual(len(out), len(left))
+        assert_series_equal(out['left'], - out['right'], check_names=False)
+        result = out.iloc[:, :-2].sum(axis=1)
+        assert_series_equal(out['left'], result, check_names=False)
+        self.assertTrue(result.name is None)
+
+        out.sort_values(out.columns.tolist(), inplace=True)
+        out.index = np.arange(len(out))
+        for how in ['left', 'right', 'outer', 'inner']:
+            assert_frame_equal(out, merge(left, right, how=how, sort=True))
+
+        # check that left merge w/ sort=False maintains left frame order
+        out = merge(left, right, how='left', sort=False)
+        assert_frame_equal(left, out[left.columns.tolist()])
+
+        out = merge(right, left, how='left', sort=False)
+        assert_frame_equal(right, out[right.columns.tolist()])
+
+        # one-2-many/none match
+        n = 1 << 11
+        left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'),
+                         columns=list('ABCDEFG'))
+
+        # confirm that this is checking what it is supposed to check
+        shape = left.apply(Series.nunique).values
+        self.assertTrue(is_int64_overflow_possible(shape))
+
+        # add duplicates to left frame
+        left = concat([left, left], ignore_index=True)
+
+        right = DataFrame(np.random.randint(low, high, (n // 2, 7))
+                          .astype('int64'),
+                          columns=list('ABCDEFG'))
+
+        # add duplicates & overlap with left to the right frame
+        i = np.random.choice(len(left), n)
+        right = concat([right, right, left.iloc[i]], ignore_index=True)
+
+        left['left'] = np.random.randn(len(left))
+        right['right'] = np.random.randn(len(right))
+
+        # shuffle left & right frames
+        i = np.random.permutation(len(left))
+        left = left.iloc[i].copy()
+        left.index = np.arange(len(left))
+
+        i = np.random.permutation(len(right))
+        right = right.iloc[i].copy()
+        right.index = np.arange(len(right))
+
+        # manually compute outer merge
+        ldict, rdict = defaultdict(list), defaultdict(list)
+
+        for idx, row in left.set_index(list('ABCDEFG')).iterrows():
+            ldict[idx].append(row['left'])
+
+        for idx, row in right.set_index(list('ABCDEFG')).iterrows():
+            rdict[idx].append(row['right'])
+
+        vals = []
+        for k, lval in ldict.items():
+            rval = rdict.get(k, [np.nan])
+            for lv, rv in product(lval, rval):
+                vals.append(k + tuple([lv, rv]))
+
+        for k, rval in rdict.items():
+            if k not in ldict:
+                for rv in rval:
+                    vals.append(k + tuple([np.nan, rv]))
+
+        def align(df):
+            df = df.sort_values(df.columns.tolist())
+            df.index = np.arange(len(df))
+            return df
+
+        def verify_order(df):
+            kcols = list('ABCDEFG')
+            assert_frame_equal(df[kcols].copy(),
+                               df[kcols].sort_values(kcols, kind='mergesort'))
+
+        out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right'])
+        out = align(out)
+
+        jmask = {'left': out['left'].notnull(),
+                 'right': out['right'].notnull(),
+                 'inner': out['left'].notnull() & out['right'].notnull(),
+                 'outer': np.ones(len(out), dtype='bool')}
+
+        for how in 'left', 'right', 'outer', 'inner':
+            mask = jmask[how]
+            frame = align(out[mask].copy())
+            self.assertTrue(mask.all() ^ mask.any() or how == 'outer')
+
+            for sort in [False, True]:
+                res = merge(left, right, how=how, sort=sort)
+                if sort:
+                    verify_order(res)
+
+                # as in GH9092 dtypes break with outer/right join
+                assert_frame_equal(frame, align(res),
+                                   check_dtype=how not in ('right', 'outer'))
+
+
+def test_decons():
+
+    def testit(label_list, shape):
+        group_index = get_group_index(label_list, shape, sort=True, xnull=True)
+        label_list2 = decons_group_index(group_index, shape)
+
+        for a, b in zip(label_list, label_list2):
+            assert (np.array_equal(a, b))
+
+    shape = (4, 5, 6)
+    label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile(
+        [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile(
+            [5, 1, 0, 2, 3, 0, 5, 4], 100)]
+    testit(label_list, shape)
+
+    shape = (10000, 10000)
+    label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)]
+    testit(label_list, shape)
diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py
index d66cd793ec0be..472d8674f9f8d 100644
--- a/pandas/tests/tools/test_merge.py
+++ b/pandas/tests/tools/test_merge.py
@@ -10,9 +10,7 @@
 from pandas.compat import lrange, lzip
 from pandas.tools.concat import concat
 from pandas.tools.merge import merge, MergeError
-from pandas.util.testing import (assert_frame_equal,
-                                 assert_series_equal,
-                                 slow)
+from pandas.util.testing import assert_frame_equal, assert_series_equal
 from pandas import DataFrame, Index, MultiIndex, Series, Categorical
 import pandas.util.testing as tm
 
@@ -1092,137 +1090,6 @@ def test_merge_na_keys(self):
 
         tm.assert_frame_equal(result, expected)
 
-    @slow
-    def test_int64_overflow_issues(self):
-        from itertools import product
-        from collections import defaultdict
-        from pandas.core.groupby import _int64_overflow_possible
-
-        # #2690, combinatorial explosion
-        df1 = DataFrame(np.random.randn(1000, 7),
-                        columns=list('ABCDEF') + ['G1'])
-        df2 = DataFrame(np.random.randn(1000, 7),
-                        columns=list('ABCDEF') + ['G2'])
-
-        # it works!
-        result = merge(df1, df2, how='outer')
-        self.assertTrue(len(result) == 2000)
-
-        low, high, n = -1 << 10, 1 << 10, 1 << 20
-        left = DataFrame(np.random.randint(low, high, (n, 7)),
-                         columns=list('ABCDEFG'))
-        left['left'] = left.sum(axis=1)
-
-        # one-2-one match
-        i = np.random.permutation(len(left))
-        right = left.iloc[i].copy()
-        right.columns = right.columns[:-1].tolist() + ['right']
-        right.index = np.arange(len(right))
-        right['right'] *= -1
-
-        out = merge(left, right, how='outer')
-        self.assertEqual(len(out), len(left))
-        assert_series_equal(out['left'], - out['right'], check_names=False)
-        result = out.iloc[:, :-2].sum(axis=1)
-        assert_series_equal(out['left'], result, check_names=False)
-        self.assertTrue(result.name is None)
-
-        out.sort_values(out.columns.tolist(), inplace=True)
-        out.index = np.arange(len(out))
-        for how in ['left', 'right', 'outer', 'inner']:
-            assert_frame_equal(out, merge(left, right, how=how, sort=True))
-
-        # check that left merge w/ sort=False maintains left frame order
-        out = merge(left, right, how='left', sort=False)
-        assert_frame_equal(left, out[left.columns.tolist()])
-
-        out = merge(right, left, how='left', sort=False)
-        assert_frame_equal(right, out[right.columns.tolist()])
-
-        # one-2-many/none match
-        n = 1 << 11
-        left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'),
-                         columns=list('ABCDEFG'))
-
-        # confirm that this is checking what it is supposed to check
-        shape = left.apply(Series.nunique).values
-        self.assertTrue(_int64_overflow_possible(shape))
-
-        # add duplicates to left frame
-        left = concat([left, left], ignore_index=True)
-
-        right = DataFrame(np.random.randint(low, high, (n // 2, 7))
-                          .astype('int64'),
-                          columns=list('ABCDEFG'))
-
-        # add duplicates & overlap with left to the right frame
-        i = np.random.choice(len(left), n)
-        right = concat([right, right, left.iloc[i]], ignore_index=True)
-
-        left['left'] = np.random.randn(len(left))
-        right['right'] = np.random.randn(len(right))
-
-        # shuffle left & right frames
-        i = np.random.permutation(len(left))
-        left = left.iloc[i].copy()
-        left.index = np.arange(len(left))
-
-        i = np.random.permutation(len(right))
-        right = right.iloc[i].copy()
-        right.index = np.arange(len(right))
-
-        # manually compute outer merge
-        ldict, rdict = defaultdict(list), defaultdict(list)
-
-        for idx, row in left.set_index(list('ABCDEFG')).iterrows():
-            ldict[idx].append(row['left'])
-
-        for idx, row in right.set_index(list('ABCDEFG')).iterrows():
-            rdict[idx].append(row['right'])
-
-        vals = []
-        for k, lval in ldict.items():
-            rval = rdict.get(k, [np.nan])
-            for lv, rv in product(lval, rval):
-                vals.append(k + tuple([lv, rv]))
-
-        for k, rval in rdict.items():
-            if k not in ldict:
-                for rv in rval:
-                    vals.append(k + tuple([np.nan, rv]))
-
-        def align(df):
-            df = df.sort_values(df.columns.tolist())
-            df.index = np.arange(len(df))
-            return df
-
-        def verify_order(df):
-            kcols = list('ABCDEFG')
-            assert_frame_equal(df[kcols].copy(),
-                               df[kcols].sort_values(kcols, kind='mergesort'))
-
-        out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right'])
-        out = align(out)
-
-        jmask = {'left': out['left'].notnull(),
-                 'right': out['right'].notnull(),
-                 'inner': out['left'].notnull() & out['right'].notnull(),
-                 'outer': np.ones(len(out), dtype='bool')}
-
-        for how in 'left', 'right', 'outer', 'inner':
-            mask = jmask[how]
-            frame = align(out[mask].copy())
-            self.assertTrue(mask.all() ^ mask.any() or how == 'outer')
-
-            for sort in [False, True]:
-                res = merge(left, right, how=how, sort=sort)
-                if sort:
-                    verify_order(res)
-
-                # as in GH9092 dtypes break with outer/right join
-                assert_frame_equal(frame, align(res),
-                                   check_dtype=how not in ('right', 'outer'))
-
     def test_join_multi_levels(self):
 
         # GH 3662
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
index d938c2eeacbef..e82e702cb6e55 100644
--- a/pandas/tools/merge.py
+++ b/pandas/tools/merge.py
@@ -34,6 +34,7 @@
                                    concatenate_block_managers)
 from pandas.util.decorators import Appender, Substitution
 
+from pandas.core.sorting import is_int64_overflow_possible
 import pandas.core.algorithms as algos
 import pandas.core.common as com
 
@@ -1397,10 +1398,9 @@ def _sort_labels(uniques, left, right):
 
 
 def _get_join_keys(llab, rlab, shape, sort):
-    from pandas.core.groupby import _int64_overflow_possible
 
     # how many levels can be done without overflow
-    pred = lambda i: not _int64_overflow_possible(shape[:i])
+    pred = lambda i: not is_int64_overflow_possible(shape[:i])
     nlev = next(filter(pred, range(len(shape), 0, -1)))
 
     # get keys for the first `nlev` levels