diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 44deab25db695..121db0f86f7ae 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -170,7 +170,7 @@ Deprecations ~~~~~~~~~~~~ - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) -- +- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6f44b5abf5b04..e32f597c9e378 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3137,8 +3137,18 @@ def is_int(v): pass if com.is_null_slice(key): + # It doesn't matter if we are positional or label based indexer = key elif is_positional: + if kind == "loc": + # GH#16121, GH#24612, GH#31810 + warnings.warn( + "Slicing a positional slice with .loc is not supported, " + "and will raise TypeError in a future version. " + "Use .loc with labels or .iloc with positions instead.", + FutureWarning, + stacklevel=6, + ) indexer = key else: indexer = self.slice_indexer(start, stop, step, kind=kind) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 03598b6bb5eca..e89b2c6f1fec0 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -40,8 +40,8 @@ def float_frame_with_na(): """ df = DataFrame(tm.getSeriesData()) # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan return df @@ -74,8 +74,8 @@ def bool_frame_with_na(): df = DataFrame(tm.getSeriesData()) > 0 df = df.astype(object) # set some NAs - df.loc[5:10] = np.nan - df.loc[15:20, -2:] = np.nan + df.iloc[5:10] = np.nan + df.iloc[15:20, -2:] = np.nan return df diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index b0cb988720c25..ade17860a99b7 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1209,7 +1209,7 @@ def test_setitem_frame_mixed(self, float_string_frame): piece = DataFrame( [[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"] ) - key = (slice(None, 2), ["A", "B"]) + key = (f.index[slice(None, 2)], ["A", "B"]) f.loc[key] = piece tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values) @@ -1220,7 +1220,7 @@ def test_setitem_frame_mixed(self, float_string_frame): index=list(f.index[0:2]) + ["foo", "bar"], columns=["A", "B"], ) - key = (slice(None, 2), ["A", "B"]) + key = (f.index[slice(None, 2)], ["A", "B"]) f.loc[key] = piece tm.assert_almost_equal( f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2] @@ -1230,7 +1230,7 @@ def test_setitem_frame_mixed(self, float_string_frame): f = float_string_frame.copy() piece = f.loc[f.index[:2], ["A"]] piece.index = f.index[-2:] - key = (slice(-2, None), ["A", "B"]) + key = (f.index[slice(-2, None)], ["A", "B"]) f.loc[key] = piece piece["B"] = np.nan tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) @@ -1238,7 +1238,7 @@ def test_setitem_frame_mixed(self, float_string_frame): # ndarray f = float_string_frame.copy() piece = float_string_frame.loc[f.index[:2], ["A", "B"]] - key = (slice(-2, None), ["A", "B"]) + key = (f.index[slice(-2, None)], ["A", "B"]) f.loc[key] = piece.values tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) @@ -1873,7 +1873,7 @@ def test_setitem_datetimelike_with_inference(self): df = DataFrame(index=date_range("20130101", periods=4)) df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]") df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]") - df.loc[:3, "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]") + df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]") df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]") df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]") df["F"] = np.timedelta64("NaT") diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index e2b417972638e..91d920c706bb6 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -21,7 +21,7 @@ class TestFrameAsof: def test_basic(self, date_range_frame): df = date_range_frame N = 50 - df.loc[15:30, "A"] = np.nan + df.loc[df.index[15:30], "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = df.asof(dates) @@ -41,7 +41,7 @@ def test_basic(self, date_range_frame): def test_subset(self, date_range_frame): N = 10 df = date_range_frame.iloc[:N].copy() - df.loc[4:8, "A"] = np.nan + df.loc[df.index[4:8], "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") # with a subset of A should be the same @@ -149,7 +149,7 @@ def test_is_copy(self, date_range_frame): # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings df = date_range_frame N = 50 - df.loc[15:30, "A"] = np.nan + df.loc[df.index[15:30], "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = df.asof(dates) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 61802956addeb..07e30d41c216d 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -913,8 +913,8 @@ def test_sum_bools(self): def test_idxmin(self, float_frame, int_frame): frame = float_frame - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: for df in [frame, int_frame]: @@ -928,8 +928,8 @@ def test_idxmin(self, float_frame, int_frame): def test_idxmax(self, float_frame, int_frame): frame = float_frame - frame.loc[5:10] = np.nan - frame.loc[15:20, -2:] = np.nan + frame.iloc[5:10] = np.nan + frame.iloc[15:20, -2:] = np.nan for skipna in [True, False]: for axis in [0, 1]: for df in [frame, int_frame]: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index fe6abef97acc4..11705cd77a325 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -339,7 +339,7 @@ def test_apply_yield_list(self, float_frame): tm.assert_frame_equal(result, float_frame) def test_apply_reduce_Series(self, float_frame): - float_frame.loc[::2, "A"] = np.nan + float_frame["A"].iloc[::2] = np.nan expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index a5f5e6f36cd58..e67fef9efef6d 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -478,7 +478,7 @@ def test_convert_objects(self, float_string_frame): length = len(float_string_frame) float_string_frame["J"] = "1." float_string_frame["K"] = "1" - float_string_frame.loc[0:5, ["J", "K"]] = "garbled" + float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled" converted = float_string_frame._convert(datetime=True, numeric=True) assert converted["H"].dtype == "float64" assert converted["I"].dtype == "int64" diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 486cbfb2761e0..1b7e70dd28c63 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -23,9 +23,9 @@ def test_cumsum_corner(self): result = dm.cumsum() # noqa def test_cumsum(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cumsum = datetime_frame.cumsum() @@ -46,9 +46,9 @@ def test_cumsum(self, datetime_frame): assert np.shape(cumsum_xs) == np.shape(datetime_frame) def test_cumprod(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cumprod = datetime_frame.cumprod() @@ -80,9 +80,9 @@ def test_cumprod(self, datetime_frame): strict=False, ) def test_cummin(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cummin = datetime_frame.cummin() @@ -108,9 +108,9 @@ def test_cummin(self, datetime_frame): strict=False, ) def test_cummax(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan + datetime_frame.iloc[5:10, 0] = np.nan + datetime_frame.iloc[10:15, 1] = np.nan + datetime_frame.iloc[15:, 2] = np.nan # axis = 0 cummax = datetime_frame.cummax() diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 86c9a98377f3f..cec2bd4b634c1 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -761,7 +761,7 @@ def create_cols(name): ) # add in some nans - df_float.loc[30:50, 1:3] = np.nan + df_float.iloc[30:50, 1:3] = np.nan # ## this is a bug in read_csv right now #### # df_dt.loc[30:50,1:3] = np.nan diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4d042af8d59b4..3073fe085de15 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -863,6 +863,7 @@ def test_loc_setitem_empty_append_raises(self): data = [1, 2] df = DataFrame(columns=["x", "y"]) + df.index = df.index.astype(np.int64) msg = ( r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " r"are in the \[index\]" @@ -975,3 +976,42 @@ def test_loc_mixed_int_float(): result = ser.loc[1] assert result == 0 + + +def test_loc_with_positional_slice_deprecation(): + # GH#31840 + ser = pd.Series(range(4), index=["A", "B", "C", "D"]) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + ser.loc[:3] = 2 + + expected = pd.Series([2, 2, 2, 3], index=["A", "B", "C", "D"]) + tm.assert_series_equal(ser, expected) + + +def test_loc_slice_disallows_positional(): + # GH#16121, GH#24612, GH#31810 + dti = pd.date_range("2016-01-01", periods=3) + df = pd.DataFrame(np.random.random((3, 2)), index=dti) + + ser = df[0] + + msg = ( + "cannot do slice indexing on DatetimeIndex with these " + r"indexers \[1\] of type int" + ) + + for obj in [df, ser]: + with pytest.raises(TypeError, match=msg): + obj.loc[1:3] + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # GH#31840 deprecated incorrect behavior + obj.loc[1:3] = 1 + + with pytest.raises(TypeError, match=msg): + df.loc[1:3, 1] + + with tm.assert_produces_warning(FutureWarning): + # GH#31840 deprecated incorrect behavior + df.loc[1:3, 1] = 2 diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index fd585a73f6ce6..fcc48daa59a40 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -342,7 +342,7 @@ def test_repr(self, setup_path): df["timestamp2"] = Timestamp("20010103") df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) - df.loc[3:6, ["obj1"]] = np.nan + df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) with catch_warnings(record=True): @@ -846,7 +846,7 @@ def test_put_mixed_type(self, setup_path): df["timestamp2"] = Timestamp("20010103") df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) - df.loc[3:6, ["obj1"]] = np.nan + df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) with ensure_clean_store(setup_path) as store: @@ -1372,11 +1372,11 @@ def check_col(key, name, size): _maybe_remove(store, "df") df = tm.makeTimeDataFrame() df["string"] = "foo" - df.loc[1:4, "string"] = np.nan + df.loc[df.index[1:4], "string"] = np.nan df["string2"] = "bar" - df.loc[4:8, "string2"] = np.nan + df.loc[df.index[4:8], "string2"] = np.nan df["string3"] = "bah" - df.loc[1:, "string3"] = np.nan + df.loc[df.index[1:], "string3"] = np.nan store.append("df", df) result = store.select("df") tm.assert_frame_equal(result, df) @@ -1492,8 +1492,8 @@ def test_append_with_data_columns(self, setup_path): # data column selection with a string data_column df_new = df.copy() df_new["string"] = "foo" - df_new.loc[1:4, "string"] = np.nan - df_new.loc[5:6, "string"] = "bar" + df_new.loc[df_new.index[1:4], "string"] = np.nan + df_new.loc[df_new.index[5:6], "string"] = "bar" _maybe_remove(store, "df") store.append("df", df_new, data_columns=["string"]) result = store.select("df", "string='foo'") @@ -1574,12 +1574,12 @@ def check_col(key, name, size): # doc example df_dc = df.copy() df_dc["string"] = "foo" - df_dc.loc[4:6, "string"] = np.nan - df_dc.loc[7:9, "string"] = "bar" + df_dc.loc[df_dc.index[4:6], "string"] = np.nan + df_dc.loc[df_dc.index[7:9], "string"] = "bar" df_dc["string2"] = "cool" df_dc["datetime"] = Timestamp("20010102") df_dc = df_dc._convert(datetime=True) - df_dc.loc[3:5, ["A", "B", "datetime"]] = np.nan + df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan _maybe_remove(store, "df_dc") store.append( @@ -1602,8 +1602,8 @@ def check_col(key, name, size): np.random.randn(8, 3), index=index, columns=["A", "B", "C"] ) df_dc["string"] = "foo" - df_dc.loc[4:6, "string"] = np.nan - df_dc.loc[7:9, "string"] = "bar" + df_dc.loc[df_dc.index[4:6], "string"] = np.nan + df_dc.loc[df_dc.index[7:9], "string"] = "bar" df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs() df_dc["string2"] = "cool" @@ -2024,7 +2024,7 @@ def test_table_mixed_dtypes(self, setup_path): df["timestamp2"] = Timestamp("20010103") df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) - df.loc[3:6, ["obj1"]] = np.nan + df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate()._convert(datetime=True) with ensure_clean_store(setup_path) as store: @@ -2200,7 +2200,7 @@ def test_invalid_terms(self, setup_path): df = tm.makeTimeDataFrame() df["string"] = "foo" - df.loc[0:4, "string"] = "bar" + df.loc[df.index[0:4], "string"] = "bar" store.put("df", df, format="table") @@ -3343,7 +3343,7 @@ def test_string_select(self, setup_path): # test string ==/!= df["x"] = "none" - df.loc[2:7, "x"] = "" + df.loc[df.index[2:7], "x"] = "" store.append("df", df, data_columns=["x"]) @@ -3365,7 +3365,7 @@ def test_string_select(self, setup_path): # int ==/!= df["int"] = 1 - df.loc[2:7, "int"] = 2 + df.loc[df.index[2:7], "int"] = 2 store.append("df3", df, data_columns=["int"]) @@ -3419,7 +3419,7 @@ def test_read_column(self, setup_path): # a data column with NaNs, result excludes the NaNs df3 = df.copy() df3["string"] = "foo" - df3.loc[4:6, "string"] = np.nan + df3.loc[df3.index[4:6], "string"] = np.nan store.append("df3", df3, data_columns=["string"]) result = store.select_column("df3", "string") tm.assert_almost_equal(result.values, df3["string"].values) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index fc9d4ec5290a5..b5d04fd499c08 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -293,7 +293,7 @@ def test_getitem_setitem_datetimeindex(): result = ts.copy() result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] + result.iloc[4:8] = ts.iloc[4:8] tm.assert_series_equal(result, ts) # also test partial date slicing @@ -349,7 +349,7 @@ def test_getitem_setitem_periodindex(): result = ts.copy() result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] + result.iloc[4:8] = ts.iloc[4:8] tm.assert_series_equal(result, ts) diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index b121efd202744..40fb605bf0ae1 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -12,7 +12,7 @@ def test_basic(self): N = 50 rng = date_range("1/1/1990", periods=N, freq="53s") ts = Series(np.random.randn(N), index=rng) - ts[15:30] = np.nan + ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = ts.asof(dates) @@ -37,8 +37,8 @@ def test_scalar(self): N = 30 rng = date_range("1/1/1990", periods=N, freq="53s") ts = Series(np.arange(N), index=rng) - ts[5:10] = np.NaN - ts[15:20] = np.NaN + ts.iloc[5:10] = np.NaN + ts.iloc[15:20] = np.NaN val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) @@ -94,7 +94,7 @@ def test_periodindex(self): N = 50 rng = period_range("1/1/1990", periods=N, freq="H") ts = Series(np.random.randn(N), index=rng) - ts[15:30] = np.nan + ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="37min") result = ts.asof(dates) @@ -112,8 +112,8 @@ def test_periodindex(self): rs = result[mask] assert (rs == ts[lb]).all() - ts[5:10] = np.nan - ts[15:20] = np.nan + ts.iloc[5:10] = np.nan + ts.iloc[15:20] = np.nan val1 = ts.asof(ts.index[7]) val2 = ts.asof(ts.index[19]) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 10197766ce4a6..95d04c9a45d25 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -74,7 +74,7 @@ def test_add_series_with_period_index(self): result = ts + ts[::2] expected = ts + ts - expected[1::2] = np.nan + expected.iloc[1::2] = np.nan tm.assert_series_equal(result, expected) result = ts + _permute(ts[::2])