diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 4bd1d5fa56468..862f5b87785f5 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,6 +1,8 @@ from datetime import datetime import numpy as np +import pytest +import pytz from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -17,19 +19,16 @@ Timestamp, cut, date_range, - to_datetime, ) import pandas._testing as tm class TestDataFrameAlterAxes: - def test_convert_dti_to_series(self): - # don't cast a DatetimeIndex WITH a tz, leave as object - # GH 6032 - idx = DatetimeIndex( - to_datetime(["2013-1-1 13:00", "2013-1-2 14:00"]), name="B" - ).tz_localize("US/Pacific") - df = DataFrame(np.random.randn(2, 1), columns=["A"]) + @pytest.fixture + def idx_expected(self): + idx = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B").tz_localize( + "US/Pacific" + ) expected = Series( np.array( @@ -41,49 +40,76 @@ def test_convert_dti_to_series(self): ), name="B", ) + assert expected.dtype == idx.dtype + return idx, expected - # convert index to series - result = Series(idx) - tm.assert_series_equal(result, expected) - - # assign to frame - df["B"] = idx - result = df["B"] - tm.assert_series_equal(result, expected) - + def test_to_series_keep_tz_deprecated_true(self, idx_expected): # convert to series while keeping the timezone + idx, expected = idx_expected + msg = "stop passing 'keep_tz'" with tm.assert_produces_warning(FutureWarning) as m: result = idx.to_series(keep_tz=True, index=[0, 1]) + assert msg in str(m[0].message) + tm.assert_series_equal(result, expected) + + def test_to_series_keep_tz_deprecated_false(self, idx_expected): + idx, expected = idx_expected + + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=False, index=[0, 1]) + tm.assert_series_equal(result, expected.dt.tz_convert(None)) + msg = "do 'idx.tz_convert(None)' before calling" assert msg in str(m[0].message) + def test_setitem_dt64series(self, idx_expected): # convert to utc + idx, expected = idx_expected + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df["B"] = idx + with tm.assert_produces_warning(FutureWarning) as m: df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) - result = df["B"] - comp = Series(DatetimeIndex(expected.values).tz_localize(None), name="B") - tm.assert_series_equal(result, comp) msg = "do 'idx.tz_convert(None)' before calling" assert msg in str(m[0].message) - result = idx.to_series(index=[0, 1]) + result = df["B"] + comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B") + tm.assert_series_equal(result, comp) + + def test_setitem_datetimeindex(self, idx_expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + idx, expected = idx_expected + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # assign to frame + df["B"] = idx + result = df["B"] tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning) as m: - result = idx.to_series(keep_tz=False, index=[0, 1]) - tm.assert_series_equal(result, expected.dt.tz_convert(None)) - msg = "do 'idx.tz_convert(None)' before calling" - assert msg in str(m[0].message) + def test_setitem_object_array_of_tzaware_datetimes(self, idx_expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + idx, expected = idx_expected + df = DataFrame(np.random.randn(2, 1), columns=["A"]) - # list of datetimes with a tz + # object array of datetimes with a tz df["B"] = idx.to_pydatetime() result = df["B"] tm.assert_series_equal(result, expected) + def test_constructor_from_tzaware_datetimeindex(self, idx_expected): + # don't cast a DatetimeIndex WITH a tz, leave as object + # GH 6032 + idx, expected = idx_expected + + # convert index to series + result = Series(idx) + tm.assert_series_equal(result, expected) + + def test_set_axis_setattr_index(self): # GH 6785 # set the index manually - import pytz df = DataFrame([{"ts": datetime(2014, 4, 1, tzinfo=pytz.utc), "foo": 1}]) expected = df.set_index("ts") @@ -102,6 +128,7 @@ def test_dti_set_index_reindex(self): df = df.reindex(idx2) tm.assert_index_equal(df.index, idx2) + def test_dti_set_index_reindex_with_tz(self): # GH 11314 # with tz index = date_range( @@ -130,16 +157,16 @@ class TestIntervalIndex: def test_setitem(self): df = DataFrame({"A": range(10)}) - s = cut(df.A, 5) - assert isinstance(s.cat.categories, IntervalIndex) + ser = cut(df["A"], 5) + assert isinstance(ser.cat.categories, IntervalIndex) # B & D end up as Categoricals # the remainer are converted to in-line objects # contining an IntervalIndex.values - df["B"] = s - df["C"] = np.array(s) - df["D"] = s.values - df["E"] = np.array(s.values) + df["B"] = ser + df["C"] = np.array(ser) + df["D"] = ser.values + df["E"] = np.array(ser.values) assert is_categorical_dtype(df["B"].dtype) assert is_interval_dtype(df["B"].cat.categories) @@ -152,17 +179,17 @@ def test_setitem(self): # they compare equal as Index # when converted to numpy objects c = lambda x: Index(np.array(x)) - tm.assert_index_equal(c(df.B), c(df.B), check_names=False) + tm.assert_index_equal(c(df.B), c(df.B)) tm.assert_index_equal(c(df.B), c(df.C), check_names=False) tm.assert_index_equal(c(df.B), c(df.D), check_names=False) - tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.C), c(df.D), check_names=False) # B & D are the same Series - tm.assert_series_equal(df["B"], df["B"], check_names=False) + tm.assert_series_equal(df["B"], df["B"]) tm.assert_series_equal(df["B"], df["D"], check_names=False) # C & E are the same Series - tm.assert_series_equal(df["C"], df["C"], check_names=False) + tm.assert_series_equal(df["C"], df["C"]) tm.assert_series_equal(df["C"], df["E"], check_names=False) def test_set_reset_index(self): diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py index fb7a6e586c460..efabc666993ee 100644 --- a/pandas/tests/frame/test_logical_ops.py +++ b/pandas/tests/frame/test_logical_ops.py @@ -11,6 +11,42 @@ class TestDataFrameLogicalOperators: # &, |, ^ + @pytest.mark.parametrize( + "left, right, op, expected", + [ + ( + [True, False, np.nan], + [True, False, True], + operator.and_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.and_, + [True, False, False], + ), + ( + [True, False, np.nan], + [True, False, True], + operator.or_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.or_, + [True, False, True], + ), + ], + ) + def test_logical_operators_nans(self, left, right, op, expected, frame_or_series): + # GH#13896 + result = op(frame_or_series(left), frame_or_series(right)) + expected = frame_or_series(expected) + + tm.assert_equal(result, expected) + def test_logical_ops_empty_frame(self): # GH#5808 # empty frames, non-mixed dtype diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 1c54855ee7bce..109561a5acb23 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -6,13 +6,15 @@ import pandas._testing as tm +def check(result, expected=None): + if expected is not None: + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + class TestDataFrameNonuniqueIndexes: def test_column_dups_operations(self): - def check(result, expected=None): - if expected is not None: - tm.assert_frame_equal(result, expected) - result.dtypes - str(result) # assignment # GH 3687 @@ -308,13 +310,7 @@ def test_column_dups2(self): result = df.dropna(subset=["A", "C"], how="all") tm.assert_frame_equal(result, expected) - def test_column_dups_indexing(self): - def check(result, expected=None): - if expected is not None: - tm.assert_frame_equal(result, expected) - result.dtypes - str(result) - + def test_getitem_boolean_series_with_duplicate_columns(self): # boolean indexing # GH 4879 dups = ["A", "A", "C", "D"] @@ -327,22 +323,32 @@ def check(result, expected=None): result = df[df.C > 6] check(result, expected) + def test_getitem_boolean_frame_with_duplicate_columns(self): + dups = ["A", "A", "C", "D"] + # where df = DataFrame( np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" ) + # `df > 6` is a DataFrame with the same shape+alignment as df expected = df[df > 6] expected.columns = dups df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") result = df[df > 6] check(result, expected) + def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self): + # `df.A > 6` is a DataFrame with a different shape from df + dups = ["A", "A", "C", "D"] + # boolean with the duplicate raises df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") msg = "cannot reindex from a duplicate axis" with pytest.raises(ValueError, match=msg): df[df.A > 6] + def test_column_dups_indexing(self): + # dup aligning operations should work # GH 5185 df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 7fde448bb36dc..6a18810700205 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -383,20 +383,22 @@ def test_transpose(self): for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.transpose().transpose(), df) - def test_numpy_transpose(self): - msg = "the 'axes' parameter is not supported" + def test_numpy_transpose(self, frame_or_series): - s = tm.makeFloatSeries() - tm.assert_series_equal(np.transpose(s), s) + obj = tm.makeTimeDataFrame() + if frame_or_series is Series: + obj = obj["A"] - with pytest.raises(ValueError, match=msg): - np.transpose(s, axes=1) + if frame_or_series is Series: + # 1D -> np.transpose is no-op + tm.assert_series_equal(np.transpose(obj), obj) - df = tm.makeTimeDataFrame() - tm.assert_frame_equal(np.transpose(np.transpose(df)), df) + # round-trip preserved + tm.assert_equal(np.transpose(np.transpose(obj)), obj) + msg = "the 'axes' parameter is not supported" with pytest.raises(ValueError, match=msg): - np.transpose(df, axes=1) + np.transpose(obj, axes=1) def test_take(self): indices = [1, 5, -2, 6, 3, -1] @@ -415,23 +417,24 @@ def test_take(self): ) tm.assert_frame_equal(out, expected) - def test_take_invalid_kwargs(self): + def test_take_invalid_kwargs(self, frame_or_series): indices = [-3, 2, 0, 1] - s = tm.makeFloatSeries() - df = tm.makeTimeDataFrame() - for obj in (s, df): - msg = r"take\(\) got an unexpected keyword argument 'foo'" - with pytest.raises(TypeError, match=msg): - obj.take(indices, foo=2) + obj = tm.makeTimeDataFrame() + if frame_or_series is Series: + obj = obj["A"] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + obj.take(indices, foo=2) - msg = "the 'out' parameter is not supported" - with pytest.raises(ValueError, match=msg): - obj.take(indices, out=indices) + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, out=indices) - msg = "the 'mode' parameter is not supported" - with pytest.raises(ValueError, match=msg): - obj.take(indices, mode="clip") + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, mode="clip") @pytest.mark.parametrize("is_copy", [True, False]) def test_depr_take_kwarg_is_copy(self, is_copy, frame_or_series): @@ -473,21 +476,19 @@ def test_axis_numbers_deprecated(self, frame_or_series): obj._AXIS_NUMBERS def test_flags_identity(self, frame_or_series): - s = Series([1, 2]) + obj = Series([1, 2]) if frame_or_series is DataFrame: - s = s.to_frame() + obj = obj.to_frame() - assert s.flags is s.flags - s2 = s.copy() - assert s2.flags is not s.flags + assert obj.flags is obj.flags + obj2 = obj.copy() + assert obj2.flags is not obj.flags - def test_slice_shift_deprecated(self): + def test_slice_shift_deprecated(self, frame_or_series): # GH 37601 - df = DataFrame({"A": [1, 2, 3, 4]}) - s = Series([1, 2, 3, 4]) - - with tm.assert_produces_warning(FutureWarning): - df["A"].slice_shift() + obj = DataFrame({"A": [1, 2, 3, 4]}) + if frame_or_series is DataFrame: + obj = obj["A"] with tm.assert_produces_warning(FutureWarning): - s.slice_shift() + obj.slice_shift() diff --git a/pandas/tests/generic/test_logical_ops.py b/pandas/tests/generic/test_logical_ops.py deleted file mode 100644 index 58185cbd9a40f..0000000000000 --- a/pandas/tests/generic/test_logical_ops.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -Shareable tests for &, |, ^ -""" -import operator - -import numpy as np -import pytest - -from pandas import DataFrame, Series -import pandas._testing as tm - - -class TestLogicalOps: - @pytest.mark.parametrize( - "left, right, op, expected", - [ - ( - [True, False, np.nan], - [True, False, True], - operator.and_, - [True, False, False], - ), - ( - [True, False, True], - [True, False, np.nan], - operator.and_, - [True, False, False], - ), - ( - [True, False, np.nan], - [True, False, True], - operator.or_, - [True, False, False], - ), - ( - [True, False, True], - [True, False, np.nan], - operator.or_, - [True, False, True], - ), - ], - ) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_logical_operators_nans(self, left, right, op, expected, klass): - # GH#13896 - result = op(klass(left), klass(right)) - expected = klass(expected) - - tm.assert_equal(result, expected) diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index a85d7ddc1ea53..a6aa45406305c 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -3,34 +3,35 @@ import pandas.util._test_decorators as td -import pandas as pd -from pandas import DataFrame, Series +from pandas import Categorical, DataFrame, MultiIndex, Series, date_range import pandas._testing as tm class TestDataFrameToXArray: - @td.skip_if_no("xarray", "0.10.0") - def test_to_xarray_index_types(self, index): - if isinstance(index, pd.MultiIndex): - pytest.skip("MultiIndex is tested separately") - if len(index) == 0: - pytest.skip("Test doesn't make sense for empty index") - - from xarray import Dataset - - df = DataFrame( + @pytest.fixture + def df(self): + return DataFrame( { "a": list("abc"), "b": list(range(1, 4)), "c": np.arange(3, 6).astype("u1"), "d": np.arange(4.0, 7.0, dtype="float64"), "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "f": Categorical(list("abc")), + "g": date_range("20130101", periods=3), + "h": date_range("20130101", periods=3, tz="US/Eastern"), } ) + @td.skip_if_no("xarray", "0.10.0") + def test_to_xarray_index_types(self, index, df): + if isinstance(index, MultiIndex): + pytest.skip("MultiIndex is tested separately") + if len(index) == 0: + pytest.skip("Test doesn't make sense for empty index") + + from xarray import Dataset + df.index = index[:3] df.index.name = "foo" df.columns.name = "bar" @@ -50,30 +51,21 @@ def test_to_xarray_index_types(self, index): tm.assert_frame_equal(result.to_dataframe(), expected) @td.skip_if_no("xarray", min_version="0.7.0") - def test_to_xarray(self): + def test_to_xarray_empty(self, df): from xarray import Dataset - df = DataFrame( - { - "a": list("abc"), - "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), - "d": np.arange(4.0, 7.0, dtype="float64"), - "e": [True, False, True], - "f": pd.Categorical(list("abc")), - "g": pd.date_range("20130101", periods=3), - "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), - } - ) - df.index.name = "foo" result = df[0:0].to_xarray() assert result.dims["foo"] == 0 assert isinstance(result, Dataset) + @td.skip_if_no("xarray", min_version="0.7.0") + def test_to_xarray_with_multiindex(self, df): + from xarray import Dataset + # available in 0.7.1 # MultiIndex - df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"]) + df.index = MultiIndex.from_product([["a"], range(3)], names=["one", "two"]) result = df.to_xarray() assert result.dims["one"] == 1 assert result.dims["two"] == 3 @@ -86,20 +78,20 @@ def test_to_xarray(self): expected = df.copy() expected["f"] = expected["f"].astype(object) expected.columns.name = None - tm.assert_frame_equal(result, expected, check_index_type=False) + tm.assert_frame_equal(result, expected) class TestSeriesToXArray: @td.skip_if_no("xarray", "0.10.0") def test_to_xarray_index_types(self, index): - if isinstance(index, pd.MultiIndex): + if isinstance(index, MultiIndex): pytest.skip("MultiIndex is tested separately") from xarray import DataArray - s = Series(range(len(index)), index=index, dtype="int64") - s.index.name = "foo" - result = s.to_xarray() + ser = Series(range(len(index)), index=index, dtype="int64") + ser.index.name = "foo" + result = ser.to_xarray() repr(result) assert len(result) == len(index) assert len(result.coords) == 1 @@ -107,27 +99,29 @@ def test_to_xarray_index_types(self, index): assert isinstance(result, DataArray) # idempotency - tm.assert_series_equal(result.to_series(), s, check_index_type=False) + tm.assert_series_equal(result.to_series(), ser) @td.skip_if_no("xarray", min_version="0.7.0") - def test_to_xarray(self): + def test_to_xarray_empty(self): from xarray import DataArray - s = Series([], dtype=object) - s.index.name = "foo" - result = s.to_xarray() + ser = Series([], dtype=object) + ser.index.name = "foo" + result = ser.to_xarray() assert len(result) == 0 assert len(result.coords) == 1 tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) assert isinstance(result, DataArray) - s = Series(range(6), dtype="int64") - s.index.name = "foo" - s.index = pd.MultiIndex.from_product( - [["a", "b"], range(3)], names=["one", "two"] - ) - result = s.to_xarray() + @td.skip_if_no("xarray", min_version="0.7.0") + def test_to_xarray_with_multiindex(self): + from xarray import DataArray + + mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"]) + ser = Series(range(6), dtype="int64", index=mi) + result = ser.to_xarray() assert len(result) == 2 tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) assert isinstance(result, DataArray) - tm.assert_series_equal(result.to_series(), s) + res = result.to_series() + tm.assert_series_equal(res, ser) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index f5c3623fb9986..46c41efc09fdf 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -53,24 +53,16 @@ def test_astype_from_categorical(self): tm.assert_series_equal(res, exp) def test_astype_categorical_to_other(self): + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() + ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) - value = np.random.RandomState(0).randint(0, 10000, 100) - df = DataFrame({"value": value}) - labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] - cat_labels = Categorical(labels, labels) - - df = df.sort_values(by=["value"], ascending=True) - df["value_group"] = pd.cut( - df.value, range(0, 10500, 500), right=False, labels=cat_labels - ) - - s = df["value_group"] - expected = s - tm.assert_series_equal(s.astype("category"), expected) - tm.assert_series_equal(s.astype(CategoricalDtype()), expected) + expected = ser + tm.assert_series_equal(ser.astype("category"), expected) + tm.assert_series_equal(ser.astype(CategoricalDtype()), expected) msg = r"could not convert string to float|invalid literal for float\(\)" with pytest.raises(ValueError, match=msg): - s.astype("float64") + ser.astype("float64") cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) @@ -84,25 +76,38 @@ def test_astype_categorical_to_other(self): def cmp(a, b): tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b))) - expected = Series(np.array(s.values), name="value_group") - cmp(s.astype("object"), expected) - cmp(s.astype(np.object_), expected) + expected = Series(np.array(ser.values), name="value_group") + cmp(ser.astype("object"), expected) + cmp(ser.astype(np.object_), expected) # array conversion - tm.assert_almost_equal(np.array(s), np.array(s.values)) + tm.assert_almost_equal(np.array(ser), np.array(ser.values)) - tm.assert_series_equal(s.astype("category"), s) - tm.assert_series_equal(s.astype(CategoricalDtype()), s) + tm.assert_series_equal(ser.astype("category"), ser) + tm.assert_series_equal(ser.astype(CategoricalDtype()), ser) - roundtrip_expected = s.cat.set_categories( - s.cat.categories.sort_values() + roundtrip_expected = ser.cat.set_categories( + ser.cat.categories.sort_values() ).cat.remove_unused_categories() - tm.assert_series_equal( - s.astype("object").astype("category"), roundtrip_expected - ) - tm.assert_series_equal( - s.astype("object").astype(CategoricalDtype()), roundtrip_expected + result = ser.astype("object").astype("category") + tm.assert_series_equal(result, roundtrip_expected) + result = ser.astype("object").astype(CategoricalDtype()) + tm.assert_series_equal(result, roundtrip_expected) + + def test_astype_categorical_invalid_conversions(self): + # invalid conversion (these are NOT a dtype) + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() + ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) + + msg = ( + "dtype '' " + "not understood" ) + with pytest.raises(TypeError, match=msg): + ser.astype(Categorical) + with pytest.raises(TypeError, match=msg): + ser.astype("object").astype(Categorical) def test_series_to_categorical(self): # see gh-16524: test conversion of Series to Categorical