Skip to content

Commit daff5ea

Browse files
committed
PERF: improved clip performance
closes pandas-dev#15400
1 parent d92f06a commit daff5ea

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

asv_bench/benchmarks/series_methods.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def setup(self):
111111
def time_series_dropna_int64(self):
112112
self.s.dropna()
113113

114+
114115
class series_dropna_datetime(object):
115116
goal_time = 0.2
116117

@@ -120,3 +121,13 @@ def setup(self):
120121

121122
def time_series_dropna_datetime(self):
122123
self.s.dropna()
124+
125+
126+
class series_clip(object):
127+
goal_time = 0.2
128+
129+
def setup(self):
130+
self.s = pd.Series(np.random.randn(50))
131+
132+
def time_series_dropna_datetime(self):
133+
self.s.clip(0, 1)

pandas/core/generic.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
_ensure_int64,
1515
needs_i8_conversion,
1616
is_scalar,
17+
is_number,
1718
is_integer, is_bool,
1819
is_bool_dtype,
1920
is_numeric_dtype,
@@ -4122,26 +4123,29 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
41224123
Examples
41234124
--------
41244125
>>> df
4125-
0 1
4126+
0 1
41264127
0 0.335232 -1.256177
41274128
1 -1.367855 0.746646
41284129
2 0.027753 -1.176076
41294130
3 0.230930 -0.679613
41304131
4 1.261967 0.570967
4132+
41314133
>>> df.clip(-1.0, 0.5)
41324134
0 1
41334135
0 0.335232 -1.000000
41344136
1 -1.000000 0.500000
41354137
2 0.027753 -1.000000
41364138
3 0.230930 -0.679613
41374139
4 0.500000 0.500000
4140+
41384141
>>> t
41394142
0 -0.3
41404143
1 -0.2
41414144
2 -0.1
41424145
3 0.0
41434146
4 0.1
41444147
dtype: float64
4148+
41454149
>>> df.clip(t, t + 1, axis=0)
41464150
0 1
41474151
0 0.335232 -0.300000
@@ -4160,6 +4164,24 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs):
41604164
if is_scalar(lower) and is_scalar(upper):
41614165
lower, upper = min(lower, upper), max(lower, upper)
41624166

4167+
# fast-path for scalars
4168+
if ((lower is None or (is_scalar(lower) and is_number(lower))) and
4169+
(upper is None or (is_scalar(upper) and is_number(upper)))):
4170+
4171+
if ((lower is not None and np.any(isnull(lower))) or
4172+
(upper is not None and np.any(isnull(upper)))):
4173+
raise ValueError("Cannot use an NA value as a clip threshold")
4174+
4175+
mask = isnull(self)
4176+
result = self.values
4177+
if upper is not None:
4178+
result = np.where(result >= upper, upper, result)
4179+
if lower is not None:
4180+
result = np.where(result <= lower, lower, result)
4181+
result[mask] = np.nan
4182+
return self._constructor(
4183+
result, **self._construct_axes_dict()).__finalize__(self)
4184+
41634185
result = self
41644186
if lower is not None:
41654187
result = result.clip_lower(lower, axis)

pandas/tests/series/test_analytics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,7 @@ def test_clip_against_series(self):
10111011

10121012
lower = Series([1.0, 2.0, 3.0])
10131013
upper = Series([1.5, 2.5, 3.5])
1014+
10141015
assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
10151016
assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
10161017

0 commit comments

Comments
 (0)