diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py new file mode 100644 index 0000000000000..fac6a9139462f --- /dev/null +++ b/pandas/tests/frame/methods/test_append.py @@ -0,0 +1,179 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timestamp +import pandas.util.testing as tm + + +class TestDataFrameAppend: + def test_append_empty_list(self): + # GH 28769 + df = DataFrame() + result = df.append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df + + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + result = df.append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df # .append() should return a new object + + def test_append_series_dict(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + series = df.loc[4] + msg = "Indexes have overlapping values" + with pytest.raises(ValueError, match=msg): + df.append(series, verify_integrity=True) + + series.name = None + msg = "Can only append a Series if ignore_index=True" + with pytest.raises(TypeError, match=msg): + df.append(series, verify_integrity=True) + + result = df.append(series[::-1], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True + ) + tm.assert_frame_equal(result, expected) + + # dict + result = df.append(series.to_dict(), ignore_index=True) + tm.assert_frame_equal(result, expected) + + result = df.append(series[::-1][:3], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True + ) + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + # can append when name set + row = df.loc[4] + row.name = 5 + result = df.append(row) + expected = df.append(df[-1:], ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_append_list_of_series_dicts(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [x.to_dict() for idx, x in df.iterrows()] + + result = df.append(dicts, ignore_index=True) + expected = df.append(df, ignore_index=True) + tm.assert_frame_equal(result, expected) + + # different columns + dicts = [ + {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, + {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, + ] + result = df.append(dicts, ignore_index=True, sort=True) + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_missing_cols(self): + # GH22252 + # exercise the conditional branch in append method where the data + # to be appended is a list and does not contain all columns that are in + # the target DataFrame + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [{"foo": 9}, {"bar": 10}] + with tm.assert_produces_warning(None): + result = df.append(dicts, ignore_index=True, sort=True) + + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_empty_dataframe(self): + + # Empty df append empty df + df1 = DataFrame() + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-empty df append empty df + df1 = DataFrame(np.random.randn(5, 2)) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Empty df with columns append empty df + df1 = DataFrame(columns=["bar", "foo"]) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-Empty df with columns append empty df + df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + def test_append_dtypes(self): + + # GH 5754 + # row appends of different dtypes (so need to do by-item) + # can sometimes infer the correct type + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) + result = df1.append(df2) + expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) + result = df1.append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) + result = df1.append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": np.nan}, index=range(1)) + df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) + result = df1.append(df2) + expected = DataFrame( + {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} + ) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) + result = df1.append(df2) + expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] + ) + def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): + # GH 30238 + tz = tz_naive_fixture + df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)]) + result = df.append(df.iloc[0]).iloc[-1] + expected = pd.Series(pd.Timestamp(timestamp, tz=tz), name=0) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 04bc87a243a9b..209b4a800354d 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -62,32 +62,15 @@ def test_cov(self, float_frame, float_string_frame): class TestDataFrameCorr: # DataFrame.corr(), as opposed to DataFrame.corrwith - @staticmethod - def _check_method(frame, method="pearson"): - correls = frame.corr(method=method) - expected = frame["A"].corr(frame["C"], method=method) - tm.assert_almost_equal(correls["A"]["C"], expected) - - @td.skip_if_no_scipy - def test_corr_pearson(self, float_frame): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - - self._check_method(float_frame, "pearson") - + @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"]) @td.skip_if_no_scipy - def test_corr_kendall(self, float_frame): + def test_corr_scipy_method(self, float_frame, method): float_frame["A"][:5] = np.nan float_frame["B"][5:10] = np.nan - self._check_method(float_frame, "kendall") - - @td.skip_if_no_scipy - def test_corr_spearman(self, float_frame): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - - self._check_method(float_frame, "spearman") + correls = float_frame.corr(method=method) + expected = float_frame["A"].corr(float_frame["C"], method=method) + tm.assert_almost_equal(correls["A"]["C"], expected) # --------------------------------------------------------------------- diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index f01a030ad0e22..efb0c64a4f7ac 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -113,6 +113,15 @@ def test_rank2(self): exp = DataFrame({"a": [3.5, 1.0, 3.5, 5.0, 6.0, 7.0, 2.0]}) tm.assert_frame_equal(df.rank(), exp) + def test_rank_does_not_mutate(self): + # GH#18521 + # Check rank does not mutate DataFrame + df = DataFrame(np.random.randn(10, 3), dtype="float64") + expected = df.copy() + df.rank() + result = df + tm.assert_frame_equal(result, expected) + def test_rank_mixed_frame(self, float_string_frame): float_string_frame["datetime"] = datetime.now() float_string_frame["timedelta"] = timedelta(days=1, seconds=1) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index a705fc89a813d..68d49c05eaa37 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1248,15 +1248,6 @@ def test_matmul(self): # --------------------------------------------------------------------- # Unsorted - def test_series_nat_conversion(self): - # GH 18521 - # Check rank does not mutate DataFrame - df = DataFrame(np.random.randn(10, 3), dtype="float64") - expected = df.copy() - df.rank() - result = df - tm.assert_frame_equal(result, expected) - def test_series_broadcasting(self): # smoke test for numpy warnings # GH 16378, GH 16306 diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index ebc4438366001..bfb691a8e75d3 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -128,177 +128,6 @@ def test_concat_tuple_keys(self): ) tm.assert_frame_equal(results, expected) - def test_append_empty_list(self): - # GH 28769 - df = DataFrame() - result = df.append([]) - expected = df - tm.assert_frame_equal(result, expected) - assert result is not df - - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - result = df.append([]) - expected = df - tm.assert_frame_equal(result, expected) - assert result is not df # .append() should return a new object - - def test_append_series_dict(self): - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - series = df.loc[4] - msg = "Indexes have overlapping values" - with pytest.raises(ValueError, match=msg): - df.append(series, verify_integrity=True) - - series.name = None - msg = "Can only append a Series if ignore_index=True" - with pytest.raises(TypeError, match=msg): - df.append(series, verify_integrity=True) - - result = df.append(series[::-1], ignore_index=True) - expected = df.append( - DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True - ) - tm.assert_frame_equal(result, expected) - - # dict - result = df.append(series.to_dict(), ignore_index=True) - tm.assert_frame_equal(result, expected) - - result = df.append(series[::-1][:3], ignore_index=True) - expected = df.append( - DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True - ) - tm.assert_frame_equal(result, expected.loc[:, result.columns]) - - # can append when name set - row = df.loc[4] - row.name = 5 - result = df.append(row) - expected = df.append(df[-1:], ignore_index=True) - tm.assert_frame_equal(result, expected) - - def test_append_list_of_series_dicts(self): - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - dicts = [x.to_dict() for idx, x in df.iterrows()] - - result = df.append(dicts, ignore_index=True) - expected = df.append(df, ignore_index=True) - tm.assert_frame_equal(result, expected) - - # different columns - dicts = [ - {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, - {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, - ] - result = df.append(dicts, ignore_index=True, sort=True) - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) - tm.assert_frame_equal(result, expected) - - def test_append_missing_cols(self): - # GH22252 - # exercise the conditional branch in append method where the data - # to be appended is a list and does not contain all columns that are in - # the target DataFrame - df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) - - dicts = [{"foo": 9}, {"bar": 10}] - with tm.assert_produces_warning(None): - result = df.append(dicts, ignore_index=True, sort=True) - - expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) - tm.assert_frame_equal(result, expected) - - def test_append_empty_dataframe(self): - - # Empty df append empty df - df1 = DataFrame() - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Non-empty df append empty df - df1 = DataFrame(np.random.randn(5, 2)) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Empty df with columns append empty df - df1 = DataFrame(columns=["bar", "foo"]) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - # Non-Empty df with columns append empty df - df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - def test_append_dtypes(self): - - # GH 5754 - # row appends of different dtypes (so need to do by-item) - # can sometimes infer the correct type - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) - df2 = DataFrame() - result = df1.append(df2) - expected = df1.copy() - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) - result = df1.append(df2) - expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) - result = df1.append(df2) - expected = DataFrame( - {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} - ) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) - result = df1.append(df2) - expected = DataFrame( - {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} - ) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": np.nan}, index=range(1)) - df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) - result = df1.append(df2) - expected = DataFrame( - {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} - ) - tm.assert_frame_equal(result, expected) - - df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) - df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) - result = df1.append(df2) - expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] - ) - def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): - # GH 30238 - tz = tz_naive_fixture - df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)]) - result = df.append(df.iloc[0]).iloc[-1] - expected = pd.Series(pd.Timestamp(timestamp, tz=tz), name=0) - tm.assert_series_equal(result, expected) - def test_update(self): df = DataFrame( [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py new file mode 100644 index 0000000000000..ec357786f18fb --- /dev/null +++ b/pandas/tests/series/methods/test_append.py @@ -0,0 +1,158 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, Series, date_range +import pandas.util.testing as tm + + +class TestSeriesAppend: + def test_append(self, datetime_series, string_series, object_series): + appended_series = string_series.append(object_series) + for idx, value in appended_series.items(): + if idx in string_series.index: + assert value == string_series[idx] + elif idx in object_series.index: + assert value == object_series[idx] + else: + raise AssertionError("orphaned index!") + + msg = "Indexes have overlapping values:" + with pytest.raises(ValueError, match=msg): + datetime_series.append(datetime_series, verify_integrity=True) + + def test_append_many(self, datetime_series): + pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] + + result = pieces[0].append(pieces[1:]) + tm.assert_series_equal(result, datetime_series) + + def test_append_duplicates(self): + # GH 13677 + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([4, 5, 6]) + exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(s1.append(s2), exp) + tm.assert_series_equal(pd.concat([s1, s2]), exp) + + # the result must have RangeIndex + exp = pd.Series([1, 2, 3, 4, 5, 6]) + tm.assert_series_equal( + s1.append(s2, ignore_index=True), exp, check_index_type=True + ) + tm.assert_series_equal( + pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True + ) + + msg = "Indexes have overlapping values:" + with pytest.raises(ValueError, match=msg): + s1.append(s2, verify_integrity=True) + with pytest.raises(ValueError, match=msg): + pd.concat([s1, s2], verify_integrity=True) + + def test_append_tuples(self): + # GH 28410 + s = pd.Series([1, 2, 3]) + list_input = [s, s] + tuple_input = (s, s) + + expected = s.append(list_input) + result = s.append(tuple_input) + + tm.assert_series_equal(expected, result) + + +class TestSeriesAppendWithDatetimeIndex: + def test_append(self): + rng = date_range("5/8/2012 1:45", periods=10, freq="5T") + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + + result = ts.append(ts) + result_df = df.append(df) + ex_index = DatetimeIndex(np.tile(rng.values, 2)) + tm.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result_df.index, ex_index) + + appended = rng.append(rng) + tm.assert_index_equal(appended, ex_index) + + appended = rng.append([rng, rng]) + ex_index = DatetimeIndex(np.tile(rng.values, 3)) + tm.assert_index_equal(appended, ex_index) + + # different index names + rng1 = rng.copy() + rng2 = rng.copy() + rng1.name = "foo" + rng2.name = "bar" + assert rng1.append(rng1).name == "foo" + assert rng1.append(rng2).name is None + + def test_append_tz(self): + # see gh-2938 + rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern") + rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern") + rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern") + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_tz_explicit_pytz(self): + # see gh-2938 + from pytz import timezone as timezone + + rng = date_range( + "5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern") + ) + rng2 = date_range( + "5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern") + ) + rng3 = date_range( + "5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern") + ) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_tz_dateutil(self): + # see gh-2938 + rng = date_range( + "5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern" + ) + rng2 = date_range( + "5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern" + ) + rng3 = date_range( + "5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern" + ) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c00113a7c47ff..03f12ea13fdaa 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -184,23 +184,6 @@ def test_is_monotonic(self): assert s.is_monotonic is False assert s.is_monotonic_decreasing is True - def test_apply_categorical(self): - values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) - s = pd.Series(values, name="XX", index=list("abcdefg")) - result = s.apply(lambda x: x.lower()) - - # should be categorical dtype when the number of categories are - # the same - values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) - exp = pd.Series(values, name="XX", index=list("abcdefg")) - tm.assert_series_equal(result, exp) - tm.assert_categorical_equal(result.values, exp.values) - - result = s.apply(lambda x: "A") - exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg")) - tm.assert_series_equal(result, exp) - assert result.dtype == np.object - def test_unstack(self): index = MultiIndex( diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 334c6994eb540..30b8b5c7c8545 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -162,6 +162,23 @@ def test_apply_dict_depr(self): with pytest.raises(SpecificationError, match=msg): tsdf.A.agg({"foo": ["sum", "mean"]}) + def test_apply_categorical(self): + values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + ser = pd.Series(values, name="XX", index=list("abcdefg")) + result = ser.apply(lambda x: x.lower()) + + # should be categorical dtype when the number of categories are + # the same + values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) + exp = pd.Series(values, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + tm.assert_categorical_equal(result.values, exp.values) + + result = ser.apply(lambda x: "A") + exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + assert result.dtype == np.object + @pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]]) def test_apply_categorical_with_nan_values(self, series): # GH 20714 bug fixed in: GH 24275 diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index c6f4ce364f328..238a413af7a31 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -4,65 +4,11 @@ import pytest import pandas as pd -from pandas import DataFrame, DatetimeIndex, Series, date_range +from pandas import DataFrame, Series import pandas.util.testing as tm class TestSeriesCombine: - def test_append(self, datetime_series, string_series, object_series): - appendedSeries = string_series.append(object_series) - for idx, value in appendedSeries.items(): - if idx in string_series.index: - assert value == string_series[idx] - elif idx in object_series.index: - assert value == object_series[idx] - else: - raise AssertionError("orphaned index!") - - msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): - datetime_series.append(datetime_series, verify_integrity=True) - - def test_append_many(self, datetime_series): - pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] - - result = pieces[0].append(pieces[1:]) - tm.assert_series_equal(result, datetime_series) - - def test_append_duplicates(self): - # GH 13677 - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([4, 5, 6]) - exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(s1.append(s2), exp) - tm.assert_series_equal(pd.concat([s1, s2]), exp) - - # the result must have RangeIndex - exp = pd.Series([1, 2, 3, 4, 5, 6]) - tm.assert_series_equal( - s1.append(s2, ignore_index=True), exp, check_index_type=True - ) - tm.assert_series_equal( - pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True - ) - - msg = "Indexes have overlapping values:" - with pytest.raises(ValueError, match=msg): - s1.append(s2, verify_integrity=True) - with pytest.raises(ValueError, match=msg): - pd.concat([s1, s2], verify_integrity=True) - - def test_append_tuples(self): - # GH 28410 - s = pd.Series([1, 2, 3]) - list_input = [s, s] - tuple_input = (s, s) - - expected = s.append(list_input) - result = s.append(tuple_input) - - tm.assert_series_equal(expected, result) - def test_combine_scalar(self): # GH 21248 # Note - combine() with another Series is tested elsewhere because @@ -319,99 +265,3 @@ def test_combine_first_dt64(self): rs = s0.combine_first(s1) xp = Series([datetime(2010, 1, 1), "2011"]) tm.assert_series_equal(rs, xp) - - -class TestTimeseries: - def test_append_concat(self): - rng = date_range("5/8/2012 1:45", periods=10, freq="5T") - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - - result = ts.append(ts) - result_df = df.append(df) - ex_index = DatetimeIndex(np.tile(rng.values, 2)) - tm.assert_index_equal(result.index, ex_index) - tm.assert_index_equal(result_df.index, ex_index) - - appended = rng.append(rng) - tm.assert_index_equal(appended, ex_index) - - appended = rng.append([rng, rng]) - ex_index = DatetimeIndex(np.tile(rng.values, 3)) - tm.assert_index_equal(appended, ex_index) - - # different index names - rng1 = rng.copy() - rng2 = rng.copy() - rng1.name = "foo" - rng2.name = "bar" - assert rng1.append(rng1).name == "foo" - assert rng1.append(rng2).name is None - - def test_append_concat_tz(self): - # see gh-2938 - rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern") - rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern") - rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern") - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_explicit_pytz(self): - # see gh-2938 - from pytz import timezone as timezone - - rng = date_range( - "5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern") - ) - rng2 = date_range( - "5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern") - ) - rng3 = date_range( - "5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern") - ) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_dateutil(self): - # see gh-2938 - rng = date_range( - "5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern" - ) - rng2 = date_range( - "5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern" - ) - rng3 = date_range( - "5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern" - ) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3)