Skip to content

Commit 48f3a49

Browse files
authored
PERF: DataFrame.corr avoid copy (#51421)
* PERF: DataFrame.corr avoid copy * PERF: DataFrame.corr avoid copy * typo fix numeric_only * Avoid copy in to_numpy * fix test * mypy fixup
1 parent eaacf83 commit 48f3a49

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

pandas/core/internals/managers.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,17 +1687,29 @@ def as_array(
16871687
-------
16881688
arr : ndarray
16891689
"""
1690+
passed_nan = lib.is_float(na_value) and isna(na_value)
1691+
16901692
# TODO(CoW) handle case where resulting array is a view
16911693
if len(self.blocks) == 0:
16921694
arr = np.empty(self.shape, dtype=float)
16931695
return arr.transpose()
16941696

1695-
# We want to copy when na_value is provided to avoid
1696-
# mutating the original object
1697-
copy = copy or na_value is not lib.no_default
1698-
16991697
if self.is_single_block:
17001698
blk = self.blocks[0]
1699+
1700+
if na_value is not lib.no_default:
1701+
# We want to copy when na_value is provided to avoid
1702+
# mutating the original object
1703+
if (
1704+
isinstance(blk.dtype, np.dtype)
1705+
and blk.dtype.kind == "f"
1706+
and passed_nan
1707+
):
1708+
# We are already numpy-float and na_value=np.nan
1709+
pass
1710+
else:
1711+
copy = True
1712+
17011713
if blk.is_extension:
17021714
# Avoid implicit conversion of extension blocks to object
17031715

@@ -1710,7 +1722,8 @@ def as_array(
17101722
else:
17111723
arr = np.asarray(blk.get_values())
17121724
if dtype:
1713-
arr = arr.astype(dtype, copy=False)
1725+
arr = arr.astype(dtype, copy=copy)
1726+
copy = False
17141727

17151728
if copy:
17161729
arr = arr.copy()
@@ -1722,7 +1735,11 @@ def as_array(
17221735
# The underlying data was copied within _interleave, so no need
17231736
# to further copy if copy=True or setting na_value
17241737

1725-
if na_value is not lib.no_default:
1738+
if na_value is lib.no_default:
1739+
pass
1740+
elif arr.dtype.kind == "f" and passed_nan:
1741+
pass
1742+
else:
17261743
arr[isna(arr)] = na_value
17271744

17281745
return arr.transpose()

pandas/tests/frame/methods/test_to_numpy.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ def test_to_numpy_copy(self):
3030
assert df.to_numpy(copy=False).base is arr
3131
assert df.to_numpy(copy=True).base is not arr
3232

33+
# we still don't want a copy when na_value=np.nan is passed,
34+
# and that can be respected because we are already numpy-float
35+
assert df.to_numpy(copy=False, na_value=np.nan).base is arr
36+
3337
def test_to_numpy_mixed_dtype_to_str(self):
3438
# https://github.com/pandas-dev/pandas/issues/35455
3539
df = DataFrame([[Timestamp("2020-01-01 00:00:00"), 100.0]])

0 commit comments

Comments
 (0)