Skip to content

Commit ec83091

Browse files
[ArrayManager] TST: get tests running for /tests/frame
1 parent 19c7f75 commit ec83091

9 files changed

+75
-15
lines changed

pandas/conftest.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -382,11 +382,14 @@ def __len__(self):
382382
# Indices
383383
# ----------------------------------------------------------------
384384
@pytest.fixture
385-
def multiindex_year_month_day_dataframe_random_data():
385+
def multiindex_year_month_day_dataframe_random_data(using_array_manager):
386386
"""
387387
DataFrame with 3 level MultiIndex (year, month, day) covering
388388
first 100 business days from 2000-01-01 with random data
389389
"""
390+
if using_array_manager:
391+
# TODO(ArrayManager) groupby
392+
pytest.skip("Not yet implemented for ArrayManager")
390393
tdf = tm.makeTimeDataFrame(100)
391394
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
392395
# use Int64Index, to make sure things work

pandas/core/internals/array_manager.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,13 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
704704

705705
value = extract_array(value, extract_numpy=True)
706706
if value.ndim == 2:
707-
value = value[0, :]
707+
if value.shape[0] == 1:
708+
value = value[0, :]
709+
else:
710+
raise ValueError(
711+
f"expected 1D array, got array with shape {value.shape}"
712+
)
713+
708714
# TODO self.arrays can be empty
709715
# assert len(value) == len(self.arrays[0])
710716

pandas/tests/frame/test_arithmetic.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import pytest
88
import pytz
99

10+
import pandas.util._test_decorators as td
11+
1012
import pandas as pd
1113
from pandas import DataFrame, MultiIndex, Series
1214
import pandas._testing as tm
@@ -641,6 +643,7 @@ def test_df_add_2d_array_collike_broadcasts(self):
641643
result = collike + df
642644
tm.assert_frame_equal(result, expected)
643645

646+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
644647
def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
645648
# GH#23000
646649
opname = all_arithmetic_operators
@@ -662,6 +665,7 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
662665
result = getattr(df, opname)(rowlike)
663666
tm.assert_frame_equal(result, expected)
664667

668+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
665669
def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators):
666670
# GH#23000
667671
opname = all_arithmetic_operators
@@ -1306,7 +1310,7 @@ def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne)
13061310

13071311
def test_comparison_protected_from_errstate(self):
13081312
missing_df = tm.makeDataFrame()
1309-
missing_df.iloc[0]["A"] = np.nan
1313+
missing_df.loc[missing_df.index[0], "A"] = np.nan
13101314
with np.errstate(invalid="ignore"):
13111315
expected = missing_df.values < 0
13121316
with np.errstate(invalid="raise"):

pandas/tests/frame/test_block_internals.py

+6
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pytest
77

88
from pandas.errors import PerformanceWarning
9+
import pandas.util._test_decorators as td
910

1011
import pandas as pd
1112
from pandas import (
@@ -24,6 +25,11 @@
2425
# structure
2526

2627

28+
# TODO(ArrayManager) check which of those tests need to be rewritten the test the
29+
# equivalent for ArrayManager
30+
pytestmark = td.skip_array_manager_invalid_test
31+
32+
2733
class TestDataFrameBlockInternals:
2834
def test_setitem_invalidates_datetime_index_freq(self):
2935
# GH#24096 altering a datetime64tz column inplace invalidates the

pandas/tests/frame/test_constructors.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import pytz
1212

1313
from pandas.compat import np_version_under1p19
14+
import pandas.util._test_decorators as td
1415

1516
from pandas.core.dtypes.common import is_integer_dtype
1617
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
@@ -130,7 +131,10 @@ def test_constructor_cast_failure(self):
130131
df["foo"] = np.ones((4, 2)).tolist()
131132

132133
# this is not ok
133-
msg = "Wrong number of items passed 2, placement implies 1"
134+
msg = (
135+
"Wrong number of items passed 2, placement implies 1"
136+
"|expected 1D array, got array"
137+
)
134138
with pytest.raises(ValueError, match=msg):
135139
df["test"] = np.ones((4, 2))
136140

@@ -145,12 +149,15 @@ def test_constructor_dtype_copy(self):
145149
new_df["col1"] = 200.0
146150
assert orig_df["col1"][0] == 1.0
147151

148-
def test_constructor_dtype_nocast_view(self):
152+
def test_constructor_dtype_nocast_view_dataframe(self):
149153
df = DataFrame([[1, 2]])
150154
should_be_view = DataFrame(df, dtype=df[0].dtype)
151155
should_be_view[0][0] = 99
152156
assert df.values[0, 0] == 99
153157

158+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array?
159+
def test_constructor_dtype_nocast_view_2d_array(self):
160+
df = DataFrame([[1, 2]])
154161
should_be_view = DataFrame(df.values, dtype=df[0].dtype)
155162
should_be_view[0][0] = 97
156163
assert df.values[0, 0] == 97
@@ -1902,6 +1909,7 @@ def test_constructor_frame_copy(self, float_frame):
19021909
assert (cop["A"] == 5).all()
19031910
assert not (float_frame["A"] == 5).all()
19041911

1912+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array?
19051913
def test_constructor_ndarray_copy(self, float_frame):
19061914
df = DataFrame(float_frame.values)
19071915

pandas/tests/frame/test_nonunique_indexes.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
import pandas as pd
57
from pandas import DataFrame, MultiIndex, Series, date_range
68
import pandas._testing as tm
@@ -14,6 +16,9 @@ def check(result, expected=None):
1416

1517

1618
class TestDataFrameNonuniqueIndexes:
19+
20+
# TODO(ArrayManager) iset with multiple elements not yet implemented
21+
@td.skip_array_manager_not_yet_implemented
1722
def test_column_dups_operations(self):
1823

1924
# assignment
@@ -337,6 +342,8 @@ def test_getitem_boolean_frame_with_duplicate_columns(self):
337342
result = df[df > 6]
338343
check(result, expected)
339344

345+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) duplicate indices
346+
# fix error message
340347
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self):
341348
# `df.A > 6` is a DataFrame with a different shape from df
342349
dups = ["A", "A", "C", "D"]
@@ -406,7 +413,7 @@ def test_column_dups_indexing(self):
406413
result = z.loc[["a", "c", "a"]]
407414
check(result, expected)
408415

409-
def test_columns_with_dups(self):
416+
def test_columns_with_dups(self, using_array_manager):
410417
# GH 3468 related
411418

412419
# basic
@@ -457,8 +464,9 @@ def test_columns_with_dups(self):
457464
)
458465
df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1)
459466

460-
assert len(df._mgr.blknos) == len(df.columns)
461-
assert len(df._mgr.blklocs) == len(df.columns)
467+
if not using_array_manager:
468+
assert len(df._mgr.blknos) == len(df.columns)
469+
assert len(df._mgr.blklocs) == len(df.columns)
462470

463471
# testing iloc
464472
for i in range(len(df.columns)):

pandas/tests/frame/test_repr_info.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,16 @@
2323

2424

2525
class TestDataFrameReprInfoEtc:
26-
def test_repr_bytes_61_lines(self):
26+
def test_repr_bytes_61_lines(self, using_array_manager):
2727
# GH#12857
2828
lets = list("ACDEFGHIJKLMNOP")
2929
slen = 50
3030
nseqs = 1000
3131
words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)]
3232
df = DataFrame(words).astype("U1")
33-
assert (df.dtypes == object).all()
33+
# TODO(Arraymanager) astype("U1") actually gives this dtype instead of object
34+
if not using_array_manager:
35+
assert (df.dtypes == object).all()
3436

3537
# smoke tests; at one point this raised with 61 but not 60
3638
repr(df)

pandas/tests/frame/test_stack_unstack.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import numpy as np
66
import pytest
77

8+
import pandas.util._test_decorators as td
9+
810
import pandas as pd
911
from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, date_range
1012
import pandas._testing as tm
@@ -52,12 +54,13 @@ def test_stack_mixed_level(self):
5254
expected = expected[["a", "b"]]
5355
tm.assert_frame_equal(result, expected)
5456

55-
def test_unstack_not_consolidated(self):
57+
def test_unstack_not_consolidated(self, using_array_manager):
5658
# Gh#34708
5759
df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]})
5860
df2 = df[["x"]]
5961
df2["y"] = df["y"]
60-
assert len(df2._mgr.blocks) == 2
62+
if not using_array_manager:
63+
assert len(df2._mgr.blocks) == 2
6164

6265
res = df2.unstack()
6366
expected = df.unstack()
@@ -110,6 +113,8 @@ def test_unstack_fill(self):
110113
expected = unstacked["w"]
111114
tm.assert_frame_equal(result, expected)
112115

116+
# TODO(ArrayManager) iset with multiple elements not yet implemented
117+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) iset
113118
def test_unstack_fill_frame(self):
114119

115120
# From a dataframe
@@ -739,7 +744,8 @@ def test_unstack_multi_level_rows_and_cols(self):
739744
expected = df.unstack(["i3"]).unstack(["i2"])
740745
tm.assert_frame_equal(result, expected)
741746

742-
def test_unstack_nan_index(self): # GH7466
747+
def test_unstack_nan_index1(self):
748+
# GH7466
743749
def cast(val):
744750
val_str = "" if val != val else val
745751
return f"{val_str:1}"
@@ -825,6 +831,7 @@ def verify(df):
825831
for col in ["4th", "5th"]:
826832
verify(udf[col])
827833

834+
def test_unstack_nan_index2(self):
828835
# GH7403
829836
df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)})
830837
df.iloc[3, 1] = np.NaN
@@ -867,6 +874,7 @@ def verify(df):
867874
right = DataFrame(vals, columns=cols, index=idx)
868875
tm.assert_frame_equal(left, right)
869876

877+
def test_unstack_nan_index3(self, using_array_manager):
870878
# GH7401
871879
df = DataFrame(
872880
{
@@ -888,8 +896,13 @@ def verify(df):
888896
)
889897

890898
right = DataFrame(vals, columns=cols, index=idx)
899+
if using_array_manager:
900+
# with ArrayManager preserve dtype where possible
901+
cols = right.columns[[1, 2, 3, 5]]
902+
right[cols] = right[cols].astype("int64")
891903
tm.assert_frame_equal(left, right)
892904

905+
def test_unstack_nan_index4(self):
893906
# GH4862
894907
vals = [
895908
["Hg", np.nan, np.nan, 680585148],
@@ -930,6 +943,8 @@ def verify(df):
930943
left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"])
931944
tm.assert_frame_equal(left.unstack(), right)
932945

946+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) MultiIndex bug
947+
def test_unstack_nan_index5(self):
933948
# GH9497 - multiple unstack with nulls
934949
df = DataFrame(
935950
{
@@ -1447,6 +1462,7 @@ def test_stack_mixed_dtype(self, multiindex_dataframe_random_data):
14471462
assert result.name is None
14481463
assert stacked["bar"].dtype == np.float_
14491464

1465+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby
14501466
def test_unstack_bug(self):
14511467
df = DataFrame(
14521468
{
@@ -1683,6 +1699,7 @@ def test_unstack_period_frame(self):
16831699

16841700
tm.assert_frame_equal(result3, expected)
16851701

1702+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby
16861703
def test_stack_multiple_bug(self):
16871704
# bug when some uniques are not present in the data GH#3170
16881705
id_col = ([1] * 3) + ([2] * 3)
@@ -1881,7 +1898,7 @@ def test_unstack_group_index_overflow(self):
18811898
result = s.unstack(4)
18821899
assert result.shape == (500, 2)
18831900

1884-
def test_unstack_with_missing_int_cast_to_float(self):
1901+
def test_unstack_with_missing_int_cast_to_float(self, using_array_manager):
18851902
# https://github.com/pandas-dev/pandas/issues/37115
18861903
df = DataFrame(
18871904
{
@@ -1893,7 +1910,8 @@ def test_unstack_with_missing_int_cast_to_float(self):
18931910

18941911
# add another int column to get 2 blocks
18951912
df["is_"] = 1
1896-
assert len(df._mgr.blocks) == 2
1913+
if not using_array_manager:
1914+
assert len(df._mgr.blocks) == 2
18971915

18981916
result = df.unstack("b")
18991917
result[("is_", "ca")] = result[("is_", "ca")].fillna(0)
@@ -1906,6 +1924,10 @@ def test_unstack_with_missing_int_cast_to_float(self):
19061924
names=[None, "b"],
19071925
),
19081926
)
1927+
if using_array_manager:
1928+
# with ArrayManager preserve dtype where possible
1929+
expected[("v", "cb")] = expected[("v", "cb")].astype("int64")
1930+
expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64")
19091931
tm.assert_frame_equal(result, expected)
19101932

19111933
def test_unstack_with_level_has_nan(self):

pandas/tests/frame/test_subclass.py

+1
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,7 @@ def test_idxmax_preserves_subclass(self):
697697
result = df.idxmax()
698698
assert isinstance(result, tm.SubclassedSeries)
699699

700+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) equals
700701
def test_equals_subclass(self):
701702
# https://github.com/pandas-dev/pandas/pull/34402
702703
# allow subclass in both directions

0 commit comments

Comments
 (0)