Skip to content

Commit 4ecba51

Browse files
authored
REF/TYP: extract function/type args in describe.py (#39165)
1 parent 25110a9 commit 4ecba51

File tree

1 file changed

+49
-29
lines changed

1 file changed

+49
-29
lines changed

pandas/core/describe.py

+49-29
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Method NDFrame.describe() delegates actual execution to function describe_ndframe().
55
"""
66

7-
from typing import TYPE_CHECKING, List, Optional, Sequence, Union
7+
from typing import TYPE_CHECKING, List, Optional, Sequence, Union, cast
88
import warnings
99

1010
import numpy as np
@@ -62,32 +62,14 @@ def describe_ndframe(
6262
if obj.ndim == 2 and obj.columns.size == 0:
6363
raise ValueError("Cannot describe a DataFrame without columns")
6464

65-
if percentiles is not None:
66-
# explicit conversion of `percentiles` to list
67-
percentiles = list(percentiles)
68-
69-
# get them all to be in [0, 1]
70-
validate_percentile(percentiles)
71-
72-
# median should always be included
73-
if 0.5 not in percentiles:
74-
percentiles.append(0.5)
75-
percentiles = np.asarray(percentiles)
76-
else:
77-
percentiles = np.array([0.25, 0.5, 0.75])
78-
79-
# sort and check for duplicates
80-
unique_pcts = np.unique(percentiles)
81-
assert percentiles is not None
82-
if len(unique_pcts) < len(percentiles):
83-
raise ValueError("percentiles cannot contain duplicates")
84-
percentiles = unique_pcts
65+
percentiles = _refine_percentiles(percentiles)
8566

8667
if obj.ndim == 1:
68+
series = cast("Series", obj)
8769
# Incompatible return value type
8870
# (got "Series", expected "FrameOrSeries") [return-value]
8971
return describe_1d(
90-
obj,
72+
series,
9173
percentiles,
9274
datetime_is_numeric,
9375
is_series=True,
@@ -125,14 +107,14 @@ def describe_ndframe(
125107
return d
126108

127109

128-
def describe_numeric_1d(series, percentiles) -> "Series":
110+
def describe_numeric_1d(series: "Series", percentiles: Sequence[float]) -> "Series":
129111
"""Describe series containing numerical data.
130112
131113
Parameters
132114
----------
133115
series : Series
134116
Series to be described.
135-
percentiles : list-like of numbers, optional
117+
percentiles : list-like of numbers
136118
The percentiles to include in the output.
137119
"""
138120
from pandas import Series
@@ -148,7 +130,7 @@ def describe_numeric_1d(series, percentiles) -> "Series":
148130
return Series(d, index=stat_index, name=series.name)
149131

150132

151-
def describe_categorical_1d(data, is_series) -> "Series":
133+
def describe_categorical_1d(data: "Series", is_series: bool) -> "Series":
152134
"""Describe series containing categorical data.
153135
154136
Parameters
@@ -210,14 +192,14 @@ def describe_categorical_1d(data, is_series) -> "Series":
210192
return Series(result, index=names, name=data.name, dtype=dtype)
211193

212194

213-
def describe_timestamp_1d(data, percentiles) -> "Series":
195+
def describe_timestamp_1d(data: "Series", percentiles: Sequence[float]) -> "Series":
214196
"""Describe series containing datetime64 dtype.
215197
216198
Parameters
217199
----------
218200
data : Series
219201
Series to be described.
220-
percentiles : list-like of numbers, optional
202+
percentiles : list-like of numbers
221203
The percentiles to include in the output.
222204
"""
223205
# GH-30164
@@ -234,14 +216,20 @@ def describe_timestamp_1d(data, percentiles) -> "Series":
234216
return Series(d, index=stat_index, name=data.name)
235217

236218

237-
def describe_1d(data, percentiles, datetime_is_numeric, *, is_series) -> "Series":
219+
def describe_1d(
220+
data: "Series",
221+
percentiles: Sequence[float],
222+
datetime_is_numeric: bool,
223+
*,
224+
is_series: bool,
225+
) -> "Series":
238226
"""Describe series.
239227
240228
Parameters
241229
----------
242230
data : Series
243231
Series to be described.
244-
percentiles : list-like of numbers, optional
232+
percentiles : list-like of numbers
245233
The percentiles to include in the output.
246234
datetime_is_numeric : bool, default False
247235
Whether to treat datetime dtypes as numeric.
@@ -263,3 +251,35 @@ def describe_1d(data, percentiles, datetime_is_numeric, *, is_series) -> "Series
263251
return describe_numeric_1d(data, percentiles)
264252
else:
265253
return describe_categorical_1d(data, is_series)
254+
255+
256+
def _refine_percentiles(percentiles: Optional[Sequence[float]]) -> Sequence[float]:
257+
"""Ensure that percentiles are unique and sorted.
258+
259+
Parameters
260+
----------
261+
percentiles : list-like of numbers, optional
262+
The percentiles to include in the output.
263+
"""
264+
if percentiles is None:
265+
return np.array([0.25, 0.5, 0.75])
266+
267+
# explicit conversion of `percentiles` to list
268+
percentiles = list(percentiles)
269+
270+
# get them all to be in [0, 1]
271+
validate_percentile(percentiles)
272+
273+
# median should always be included
274+
if 0.5 not in percentiles:
275+
percentiles.append(0.5)
276+
277+
percentiles = np.asarray(percentiles)
278+
279+
# sort and check for duplicates
280+
unique_pcts = np.unique(percentiles)
281+
assert percentiles is not None
282+
if len(unique_pcts) < len(percentiles):
283+
raise ValueError("percentiles cannot contain duplicates")
284+
285+
return unique_pcts

0 commit comments

Comments
 (0)