diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 20e8e252615d3..11e076f313540 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,12 +3,30 @@ import numpy as np import pytest -from pandas import DataFrame, Index, Series +from pandas import DataFrame, Index, IndexSlice, MultiIndex, Series, concat import pandas._testing as tm +import pandas.core.common as com from pandas.tseries.offsets import BDay +@pytest.fixture +def four_level_index_dataframe(): + arr = np.array( + [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + [-0.6662, -0.5243, -0.358, 0.89145, 2.5838], + ] + ) + index = MultiIndex( + levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]], + codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]], + names=["one", "two", "three", "four"], + ) + return DataFrame(arr, index=index, columns=list("ABCDE")) + + class TestXS: def test_xs(self, float_frame, datetime_frame): idx = float_frame.index[5] @@ -92,3 +110,190 @@ def test_xs_view(self): dm.xs(2)[:] = 10 assert (dm.xs(2) == 10).all() + + +class TestXSWithMultiIndex: + def test_xs_integer_key(self): + # see GH#2107 + dates = range(20111201, 20111205) + ids = list("abcde") + index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) + df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"]) + + result = df.xs(20111201, level="date") + expected = df.loc[20111201, :] + tm.assert_frame_equal(result, expected) + + def test_xs_level(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs("two", level="second") + expected = df[df.index.get_level_values(1) == "two"] + expected.index = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_frame_equal(result, expected) + + def test_xs_level_eq_2(self): + arr = np.random.randn(3, 5) + index = MultiIndex( + levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], + codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], + ) + df = DataFrame(arr, index=index) + expected = DataFrame(arr[1:2], index=[["a"], ["b"]]) + result = df.xs("c", level=2) + tm.assert_frame_equal(result, expected) + + def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data): + # this is a copy in 0.14 + df = multiindex_dataframe_random_data + result = df.xs("two", level="second") + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + result[:] = 10 + + def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe): + # this is a copy in 0.14 + df = four_level_index_dataframe + result = df.xs(("a", 4), level=["one", "four"]) + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + result[:] = 10 + + @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) + def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data): + # see GH#13719 + frame = multiindex_dataframe_random_data + df = concat([frame] * 2) + assert df.index.is_unique is False + expected = concat([frame.xs("one", level="second")] * 2) + + result = df.xs(key, level=level) + tm.assert_frame_equal(result, expected) + + def test_xs_missing_values_in_index(self): + # see GH#6574 + # missing values in returned index should be preserved + acc = [ + ("a", "abcde", 1), + ("b", "bbcde", 2), + ("y", "yzcde", 25), + ("z", "xbcde", 24), + ("z", None, 26), + ("z", "zbcde", 25), + ("z", "ybcde", 26), + ] + df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"]) + expected = DataFrame( + {"cnt": [24, 26, 25, 26]}, + index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"), + ) + + result = df.xs("z", level="a1") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "key, level, exp_arr, exp_index", + [ + ("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")), + ("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")), + ], + ) + def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index): + # see GH#2903 + arr = np.random.randn(4, 4) + index = MultiIndex( + levels=[["a", "b"], ["bar", "foo", "hello", "world"]], + codes=[[0, 0, 1, 1], [0, 1, 2, 3]], + names=["lvl0", "lvl1"], + ) + df = DataFrame(arr, columns=index) + result = df.xs(key, level=level, axis=1) + expected = DataFrame(exp_arr(arr), columns=exp_index) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer", + [ + lambda df: df.xs(("a", 4), level=["one", "four"]), + lambda df: df.xs("a").xs(4, level="four"), + ], + ) + def test_xs_level_multiple(self, indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]] + expected_index = MultiIndex( + levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"] + ) + expected = DataFrame( + expected_values, index=expected_index, columns=list("ABCDE") + ) + result = indexer(df) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")] + ) + def test_xs_level0(self, indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + ] + expected_index = MultiIndex( + levels=[["b", "q"], [10.0032, 20.0], [4, 5]], + codes=[[0, 1], [0, 1], [1, 0]], + names=["two", "three", "four"], + ) + expected = DataFrame( + expected_values, index=expected_index, columns=list("ABCDE") + ) + + result = indexer(df) + tm.assert_frame_equal(result, expected) + + def test_xs_values(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")).values + expected = df.values[4] + tm.assert_almost_equal(result, expected) + + def test_xs_loc_equality(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")) + expected = df.loc[("bar", "two")] + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_xs_IndexSlice_argument_not_implemented(self, klass): + # GH#35301 + + index = MultiIndex( + levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + ) + + obj = DataFrame(np.random.randn(6, 4), index=index) + if klass is Series: + obj = obj[0] + + msg = ( + "Expected label or tuple of labels, got " + r"\(\('foo', 'qux', 0\), slice\(None, None, None\)\)" + ) + with pytest.raises(TypeError, match=msg): + obj.xs(IndexSlice[("foo", "qux", 0), :]) + + @pytest.mark.parametrize("klass", [DataFrame, Series]) + def test_xs_levels_raises(self, klass): + obj = DataFrame({"A": [1, 2, 3]}) + if klass is Series: + obj = obj["A"] + + msg = "Index must be a MultiIndex" + with pytest.raises(TypeError, match=msg): + obj.xs(0, level="as") diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 81be110fd11e5..57747f8274d85 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -662,6 +662,13 @@ def test_get_loc_past_lexsort_depth(self): assert result == slice(0, 1, None) + def test_multiindex_get_loc_list_raises(self): + # GH#35878 + idx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = "unhashable type" + with pytest.raises(TypeError, match=msg): + idx.get_loc([]) + class TestWhere: def test_where(self): diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 0e466b49f6597..2cb5b55f14596 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -590,3 +590,11 @@ def test_missing_key_raises_keyerror2(self): with pytest.raises(KeyError, match=r"\(0, 3\)"): ser.loc[0, 3] + + +def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data): + df = multiindex_year_month_day_dataframe_random_data + ser = df["A"] + result = ser[2000, 5] + expected = df.loc[2000, 5]["A"] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index ed1348efb5cba..a3b8d66c92024 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -1,5 +1,4 @@ import numpy as np -import pytest import pandas._libs.index as _index from pandas.errors import PerformanceWarning @@ -84,10 +83,3 @@ def test_nested_tuples_duplicates(self): df3 = df.copy(deep=True) df3.loc[[(dti[0], "a")], "c2"] = 1.0 tm.assert_frame_equal(df3, expected) - - def test_multiindex_get_loc_list_raises(self): - # https://github.com/pandas-dev/pandas/issues/35878 - idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)]) - msg = "unhashable type" - with pytest.raises(TypeError, match=msg): - idx.get_loc([]) diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py deleted file mode 100644 index 91be1d913001b..0000000000000 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ /dev/null @@ -1,280 +0,0 @@ -import numpy as np -import pytest - -from pandas import DataFrame, Index, IndexSlice, MultiIndex, Series, concat, date_range -import pandas._testing as tm -import pandas.core.common as com - - -@pytest.fixture -def four_level_index_dataframe(): - arr = np.array( - [ - [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], - [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], - [-0.6662, -0.5243, -0.358, 0.89145, 2.5838], - ] - ) - index = MultiIndex( - levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]], - codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]], - names=["one", "two", "three", "four"], - ) - return DataFrame(arr, index=index, columns=list("ABCDE")) - - -@pytest.mark.parametrize( - "key, level, exp_arr, exp_index", - [ - ("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")), - ("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")), - ], -) -def test_xs_named_levels_axis_eq_1(key, level, exp_arr, exp_index): - # see gh-2903 - arr = np.random.randn(4, 4) - index = MultiIndex( - levels=[["a", "b"], ["bar", "foo", "hello", "world"]], - codes=[[0, 0, 1, 1], [0, 1, 2, 3]], - names=["lvl0", "lvl1"], - ) - df = DataFrame(arr, columns=index) - result = df.xs(key, level=level, axis=1) - expected = DataFrame(exp_arr(arr), columns=exp_index) - tm.assert_frame_equal(result, expected) - - -def test_xs_values(multiindex_dataframe_random_data): - df = multiindex_dataframe_random_data - result = df.xs(("bar", "two")).values - expected = df.values[4] - tm.assert_almost_equal(result, expected) - - -def test_xs_loc_equality(multiindex_dataframe_random_data): - df = multiindex_dataframe_random_data - result = df.xs(("bar", "two")) - expected = df.loc[("bar", "two")] - tm.assert_series_equal(result, expected) - - -def test_xs_missing_values_in_index(): - # see gh-6574 - # missing values in returned index should be preserved - acc = [ - ("a", "abcde", 1), - ("b", "bbcde", 2), - ("y", "yzcde", 25), - ("z", "xbcde", 24), - ("z", None, 26), - ("z", "zbcde", 25), - ("z", "ybcde", 26), - ] - df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"]) - expected = DataFrame( - {"cnt": [24, 26, 25, 26]}, - index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"), - ) - - result = df.xs("z", level="a1") - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) -def test_xs_with_duplicates(key, level, multiindex_dataframe_random_data): - # see gh-13719 - frame = multiindex_dataframe_random_data - df = concat([frame] * 2) - assert df.index.is_unique is False - expected = concat([frame.xs("one", level="second")] * 2) - - result = df.xs(key, level=level) - tm.assert_frame_equal(result, expected) - - -def test_xs_level(multiindex_dataframe_random_data): - df = multiindex_dataframe_random_data - result = df.xs("two", level="second") - expected = df[df.index.get_level_values(1) == "two"] - expected.index = Index(["foo", "bar", "baz", "qux"], name="first") - tm.assert_frame_equal(result, expected) - - -def test_xs_level_eq_2(): - arr = np.random.randn(3, 5) - index = MultiIndex( - levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], - codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], - ) - df = DataFrame(arr, index=index) - expected = DataFrame(arr[1:2], index=[["a"], ["b"]]) - result = df.xs("c", level=2) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "indexer", - [ - lambda df: df.xs(("a", 4), level=["one", "four"]), - lambda df: df.xs("a").xs(4, level="four"), - ], -) -def test_xs_level_multiple(indexer, four_level_index_dataframe): - df = four_level_index_dataframe - expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]] - expected_index = MultiIndex( - levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"] - ) - expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE")) - result = indexer(df) - tm.assert_frame_equal(result, expected) - - -def test_xs_setting_with_copy_error(multiindex_dataframe_random_data): - # this is a copy in 0.14 - df = multiindex_dataframe_random_data - result = df.xs("two", level="second") - - # setting this will give a SettingWithCopyError - # as we are trying to write a view - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(com.SettingWithCopyError, match=msg): - result[:] = 10 - - -def test_xs_setting_with_copy_error_multiple(four_level_index_dataframe): - # this is a copy in 0.14 - df = four_level_index_dataframe - result = df.xs(("a", 4), level=["one", "four"]) - - # setting this will give a SettingWithCopyError - # as we are trying to write a view - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(com.SettingWithCopyError, match=msg): - result[:] = 10 - - -def test_xs_integer_key(): - # see gh-2107 - dates = range(20111201, 20111205) - ids = list("abcde") - index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) - df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"]) - - result = df.xs(20111201, level="date") - expected = df.loc[20111201, :] - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")] -) -def test_xs_level0(indexer, four_level_index_dataframe): - df = four_level_index_dataframe - expected_values = [ - [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], - [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], - ] - expected_index = MultiIndex( - levels=[["b", "q"], [10.0032, 20.0], [4, 5]], - codes=[[0, 1], [0, 1], [1, 0]], - names=["two", "three", "four"], - ) - expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE")) - - result = indexer(df) - tm.assert_frame_equal(result, expected) - - -def test_xs_level_series(multiindex_dataframe_random_data): - # this test is not explicitly testing .xs functionality - # TODO: move to another module or refactor - df = multiindex_dataframe_random_data - s = df["A"] - result = s[:, "two"] - expected = df.xs("two", level=1)["A"] - tm.assert_series_equal(result, expected) - - -def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data): - # this test is not explicitly testing .xs functionality - # TODO: move to another module or refactor - df = multiindex_year_month_day_dataframe_random_data - s = df["A"] - result = s[2000, 5] - expected = df.loc[2000, 5]["A"] - tm.assert_series_equal(result, expected) - - -def test_xs_level_series_slice_not_implemented( - multiindex_year_month_day_dataframe_random_data, -): - # this test is not explicitly testing .xs functionality - # TODO: move to another module or refactor - # not implementing this for now - df = multiindex_year_month_day_dataframe_random_data - s = df["A"] - - msg = r"\(2000, slice\(3, 4, None\)\)" - with pytest.raises(TypeError, match=msg): - s[2000, 3:4] - - -def test_xs_IndexSlice_argument_not_implemented(): - # GH 35301 - - index = MultiIndex( - levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], - codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], - ) - - series = Series(np.random.randn(6), index=index) - frame = DataFrame(np.random.randn(6, 4), index=index) - - msg = ( - "Expected label or tuple of labels, got " - r"\(\('foo', 'qux', 0\), slice\(None, None, None\)\)" - ) - with pytest.raises(TypeError, match=msg): - frame.xs(IndexSlice[("foo", "qux", 0), :]) - with pytest.raises(TypeError, match=msg): - series.xs(IndexSlice[("foo", "qux", 0), :]) - - -def test_series_getitem_multiindex_xs(): - # GH6258 - dt = list(date_range("20130903", periods=3)) - idx = MultiIndex.from_product([list("AB"), dt]) - s = Series([1, 3, 4, 1, 3, 4], index=idx) - expected = Series([1, 1], index=list("AB")) - - result = s.xs("20130903", level=1) - tm.assert_series_equal(result, expected) - - -def test_series_getitem_multiindex_xs_by_label(): - # GH5684 - idx = MultiIndex.from_tuples( - [("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")] - ) - s = Series([1, 2, 3, 4], index=idx) - return_value = s.index.set_names(["L1", "L2"], inplace=True) - assert return_value is None - expected = Series([1, 3], index=["a", "b"]) - return_value = expected.index.set_names(["L1"], inplace=True) - assert return_value is None - - result = s.xs("one", level="L2") - tm.assert_series_equal(result, expected) - - -def test_xs_levels_raises(): - df = DataFrame({"A": [1, 2, 3]}) - - msg = "Index must be a MultiIndex" - with pytest.raises(TypeError, match=msg): - df.xs(0, level="as") - - s = df.A - with pytest.raises(TypeError, match=msg): - s.xs(0, level="as") diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 0da1f74d9cde6..9f6aab823c3ad 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -278,3 +278,15 @@ def test_getitem_ndim_deprecated(): s = Series([0, 1]) with tm.assert_produces_warning(FutureWarning): s[:, None] + + +def test_getitem_multilevel_scalar_slice_not_implemented( + multiindex_year_month_day_dataframe_random_data, +): + # not implementing this for now + df = multiindex_year_month_day_dataframe_random_data + ser = df["A"] + + msg = r"\(2000, slice\(3, 4, None\)\)" + with pytest.raises(TypeError, match=msg): + ser[2000, 3:4] diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index 43458ca2ebeb2..1a23b09bde816 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -1,13 +1,14 @@ import numpy as np -import pandas as pd +from pandas import MultiIndex, Series, date_range +import pandas._testing as tm def test_xs_datetimelike_wrapping(): # GH#31630 a case where we shouldn't wrap datetime64 in Timestamp - arr = pd.date_range("2016-01-01", periods=3)._data._data + arr = date_range("2016-01-01", periods=3)._data._data - ser = pd.Series(arr, dtype=object) + ser = Series(arr, dtype=object) for i in range(len(ser)): ser.iloc[i] = arr[i] assert ser.dtype == object @@ -15,3 +16,37 @@ def test_xs_datetimelike_wrapping(): result = ser.xs(0) assert isinstance(result, np.datetime64) + + +class TestXSWithMultiIndex: + def test_xs_level_series(self, multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + ser = df["A"] + expected = ser[:, "two"] + result = df.xs("two", level=1)["A"] + tm.assert_series_equal(result, expected) + + def test_series_getitem_multiindex_xs_by_label(self): + # GH#5684 + idx = MultiIndex.from_tuples( + [("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")] + ) + ser = Series([1, 2, 3, 4], index=idx) + return_value = ser.index.set_names(["L1", "L2"], inplace=True) + assert return_value is None + expected = Series([1, 3], index=["a", "b"]) + return_value = expected.index.set_names(["L1"], inplace=True) + assert return_value is None + + result = ser.xs("one", level="L2") + tm.assert_series_equal(result, expected) + + def test_series_getitem_multiindex_xs(xs): + # GH#6258 + dt = list(date_range("20130903", periods=3)) + idx = MultiIndex.from_product([list("AB"), dt]) + ser = Series([1, 3, 4, 1, 3, 4], index=idx) + expected = Series([1, 1], index=list("AB")) + + result = ser.xs("20130903", level=1) + tm.assert_series_equal(result, expected)