diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a995fc10a6674..805cd213f0203 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2183,7 +2183,7 @@ def _get_consensus_name(self, other): return self._shallow_copy(name=name) return self - def union(self, other): + def union(self, other, sort=True): """ Form the union of two Index objects and sorts if possible. @@ -2241,27 +2241,31 @@ def union(self, other): allow_fill=False) result = _concat._concat_compat((self._values, other_diff)) - try: - self._values[0] < other_diff[0] - except TypeError as e: - warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, RuntimeWarning, - stacklevel=3) - else: - types = frozenset((self.inferred_type, - other.inferred_type)) - if not types & _unsortable_types: - result.sort() + if sort: + try: + self._values[0] < other_diff[0] + except TypeError as e: + warnings.warn("%s, sort order is undefined for " + "incomparable objects" % e, + RuntimeWarning, + stacklevel=3) + else: + types = frozenset((self.inferred_type, + other.inferred_type)) + if not types & _unsortable_types: + result.sort() else: result = self._values - try: - result = np.sort(result) - except TypeError as e: - warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, RuntimeWarning, - stacklevel=3) + if sort: + try: + result = np.sort(result) + except TypeError as e: + warnings.warn("%s, sort order is undefined for " + "incomparable objects" % e, + RuntimeWarning, + stacklevel=3) # for subclasses return self._wrap_union_result(other, result) @@ -2326,7 +2330,7 @@ def intersection(self, other): taken.name = None return taken - def difference(self, other): + def difference(self, other, sort=True): """ Return a new Index with elements from the index that are not in `other`. @@ -2366,14 +2370,15 @@ def difference(self, other): label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) - try: - the_diff = sorting.safe_sort(the_diff) - except TypeError: - pass + if sort: + try: + the_diff = sorting.safe_sort(the_diff) + except TypeError: + pass return this._shallow_copy(the_diff, name=result_name, freq=None) - def symmetric_difference(self, other, result_name=None): + def symmetric_difference(self, other, result_name=None, sort=True): """ Compute the symmetric difference of two Index objects. It's sorted if sorting is possible. @@ -2426,10 +2431,11 @@ def symmetric_difference(self, other, result_name=None): right_diff = other.values.take(right_indexer) the_diff = _concat._concat_compat([left_diff, right_diff]) - try: - the_diff = sorting.safe_sort(the_diff) - except TypeError: - pass + if sort: + try: + the_diff = sorting.safe_sort(the_diff) + except TypeError: + pass attribs = self._get_attributes_dict() attribs['name'] = result_name diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 307cda7f2d1cb..5835aee9a68b7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -766,6 +766,103 @@ def test_union(self): tm.assert_contains_all(self.strIndex, secondCat) tm.assert_contains_all(self.dateIndex, firstCat) + def test_union_sorting_false(self): + + # GH 17839 + dt_index = pd.DatetimeIndex(["20/12/2012", "15/05/2015", + "1/1/2011", "1/7/2017"]) + first = pd.Index([2, 0, 4, 3]) + second = pd.Index([1, 4]) + everything = pd.Index([2, 0, 4, 3, 1]) + union = first.union(second, sort=False) + assert tm.equalContents(union, everything) + + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case, sort=False) + assert tm.equalContents(result, everything) + + # Corner cases + union = first.union(first, sort=False) + assert union is first + + union = first.union([], sort=False) + assert union is first + + union = Index([]).union(first, sort=False) + assert union is first + + # preserve names + first = Index(list('ba'), name='A') + second = Index(list('ba'), name='B') + union = first.union(second, sort=False) + expected = Index(list('ba'), name=None) + tm.assert_index_equal(union, expected) + + first = Index(list('ba'), name='A') + second = Index([], name='B') + union = first.union(second, sort=False) + expected = Index(list('ba'), name=None) + tm.assert_index_equal(union, expected) + + first = Index([], name='A') + second = Index(list('ba'), name='B') + union = first.union(second, sort=False) + expected = Index(list('ba'), name=None) + tm.assert_index_equal(union, expected) + + first = Index(list('ba')) + second = Index(list('ba'), name='B') + union = first.union(second, sort=False) + expected = Index(list('ba'), name='B') + tm.assert_index_equal(union, expected) + + first = Index([]) + second = Index(list('ba'), name='B') + union = first.union(second, sort=False) + expected = Index(list('ba'), name='B') + tm.assert_index_equal(union, expected) + + first = Index(list('ba')) + second = Index([], name='B') + union = first.union(second, sort=False) + expected = Index(list('ba'), name='B') + tm.assert_index_equal(union, expected) + + first = Index(list('ba'), name='A') + second = Index(list('ba')) + union = first.union(second, sort=False) + expected = Index(list('ba'), name='A') + tm.assert_index_equal(union, expected) + + first = Index(list('ba'), name='A') + second = Index([]) + union = first.union(second, sort=False) + expected = Index(list('ba'), name='A') + tm.assert_index_equal(union, expected) + + first = Index([], name='A') + second = Index(list('ba')) + union = first.union(second, sort=False) + expected = Index(list('ba'), name='A') + tm.assert_index_equal(union, expected) + + first = pd.Index([2, 0, 4, 3]) + with tm.assert_produces_warning(RuntimeWarning): + firstCat = first.union(dt_index, sort=False) + secondCat = first.union(first, sort=False) + + if dt_index.dtype == np.object_: + appended = np.append(first, dt_index) + else: + appended = np.append(first, dt_index.astype('O')) + + assert tm.equalContents(firstCat, appended) + assert tm.equalContents(secondCat, first) + tm.assert_contains_all(first, firstCat) + tm.assert_contains_all(first, secondCat) + tm.assert_contains_all(dt_index, firstCat) + def test_add(self): idx = self.strIndex expected = Index(self.strIndex.values * 2) @@ -896,6 +993,34 @@ def test_difference(self): assert len(result) == 0 assert result.name == first.name + def test_difference_sorting_false(self): + + # GH 17839 + first = pd.Index([2, 0, 4, 3]) + second = pd.Index([2, 1]) + answer = pd.Index([0, 4, 3]) + first.name = 'name' + # different names + result = first.difference(second, sort=False) + + assert tm.equalContents(result, answer) + assert result.name is None + + # same names + second.name = 'name' + result = first.difference(second, sort=False) + assert result.name == 'name' + + # with empty + result = first.difference([], sort=False) + assert tm.equalContents(result, first) + assert result.name == first.name + + # with everything + result = first.difference(first, sort=False) + assert len(result) == 0 + assert result.name == first.name + def test_symmetric_difference(self): # smoke idx1 = Index([1, 2, 3, 4], name='idx1') @@ -944,6 +1069,55 @@ def test_symmetric_difference(self): assert tm.equalContents(result, expected) assert result.name == 'new_name' + def test_symmetric_difference_sorting_false(self): + + # GH 17839 + # smoke + idx1 = Index([1, 2, 3, 4], name='idx1') + idx2 = Index([2, 3, 4, 5]) + result = idx1.symmetric_difference(idx2, sort=False) + expected = Index([1, 5]) + assert tm.equalContents(result, expected) + assert result.name is None + + # __xor__ syntax + expected = idx1 ^ idx2 + assert tm.equalContents(result, expected) + assert result.name is None + + # multiIndex + idx1 = MultiIndex.from_tuples(self.tuples) + idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) + result = idx1.symmetric_difference(idx2, sort=False) + expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) + assert tm.equalContents(result, expected) + + # nans: + idx1 = Index([1, np.nan, 2, 3]) + idx2 = Index([0, 1, np.nan]) + idx3 = Index([0, 1]) + + result = idx1.symmetric_difference(idx2, sort=False) + expected = Index([2.0, 3.0, 0.0]) + tm.assert_index_equal(result, expected) + + result = idx1.symmetric_difference(idx3, sort=False) + expected = Index([np.nan, 2.0, 3.0, 0.0]) + tm.assert_index_equal(result, expected) + + # other not an Index: + idx1 = Index([5, 3, 4, 2], name='idx1') + idx2 = np.array([3, 2, 4, 1]) + expected = Index([5, 1]) + result = idx1.symmetric_difference(idx2, sort=False) + assert tm.equalContents(result, expected) + assert result.name == 'idx1' + + result = idx1.symmetric_difference(idx2, result_name='new_name', + sort=False) + assert tm.equalContents(result, expected) + assert result.name == 'new_name' + def test_is_numeric(self): assert not self.dateIndex.is_numeric() assert not self.strIndex.is_numeric()