Skip to content

Commit 2f57671

Browse files
committed
ENH: show percentiles in timestamp describe (#30164)
1 parent f873fb9 commit 2f57671

File tree

3 files changed

+33
-64
lines changed

3 files changed

+33
-64
lines changed

pandas/core/generic.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
from pandas._config import config
3232

33-
from pandas._libs import Timestamp, iNaT, lib, properties
33+
from pandas._libs import NaT, Timestamp, iNaT, lib, properties
3434
from pandas._typing import (
3535
Axis,
3636
Dtype,
@@ -9626,26 +9626,8 @@ def describe_categorical_1d(data):
96269626
dtype = None
96279627
if result[1] > 0:
96289628
top, freq = objcounts.index[0], objcounts.iloc[0]
9629-
9630-
if is_datetime64_any_dtype(data):
9631-
tz = data.dt.tz
9632-
asint = data.dropna().values.view("i8")
9633-
top = Timestamp(top)
9634-
if top.tzinfo is not None and tz is not None:
9635-
# Don't tz_localize(None) if key is already tz-aware
9636-
top = top.tz_convert(tz)
9637-
else:
9638-
top = top.tz_localize(tz)
9639-
names += ["top", "freq", "first", "last"]
9640-
result += [
9641-
top,
9642-
freq,
9643-
Timestamp(asint.min(), tz=tz),
9644-
Timestamp(asint.max(), tz=tz),
9645-
]
9646-
else:
9647-
names += ["top", "freq"]
9648-
result += [top, freq]
9629+
names += ["top", "freq"]
9630+
result += [top, freq]
96499631

96509632
# If the DataFrame is empty, set 'top' and 'freq' to None
96519633
# to maintain output shape consistency
@@ -9656,11 +9638,30 @@ def describe_categorical_1d(data):
96569638

96579639
return pd.Series(result, index=names, name=data.name, dtype=dtype)
96589640

9641+
def describe_timestamp_1d(data):
9642+
# GH-30164
9643+
tz = data.dt.tz
9644+
asint = data.dropna().values.view("i8")
9645+
is_empty = asint.shape[0] == 0
9646+
stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
9647+
d = (
9648+
[
9649+
asint.shape[0],
9650+
Timestamp(asint.mean(), tz=tz) if not is_empty else NaT,
9651+
Timestamp(asint.min(), tz=tz) if not is_empty else NaT,
9652+
]
9653+
+ data.quantile(percentiles).tolist()
9654+
+ [Timestamp(asint.max(), tz=tz) if not is_empty else NaT]
9655+
)
9656+
return pd.Series(d, index=stat_index, name=data.name)
9657+
96599658
def describe_1d(data):
96609659
if is_bool_dtype(data):
96619660
return describe_categorical_1d(data)
96629661
elif is_numeric_dtype(data):
96639662
return describe_numeric_1d(data)
9663+
elif is_datetime64_any_dtype(data):
9664+
return describe_timestamp_1d(data)
96649665
elif is_timedelta64_dtype(data):
96659666
return describe_numeric_1d(data)
96669667
else:

pandas/tests/frame/methods/test_describe.py

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -253,52 +253,19 @@ def test_describe_tz_values(self, tz_naive_fixture):
253253

254254
expected = DataFrame(
255255
{
256-
"s1": [
257-
5,
258-
np.nan,
259-
np.nan,
260-
np.nan,
261-
np.nan,
262-
np.nan,
263-
2,
264-
1.581139,
265-
0,
266-
1,
267-
2,
268-
3,
269-
4,
270-
],
256+
"s1": [5, 2, 0, 1, 2, 3, 4, 1.581139],
271257
"s2": [
272258
5,
273-
5,
274-
s2.value_counts().index[0],
275-
1,
259+
Timestamp(2018, 1, 3).tz_localize(tz),
276260
start.tz_localize(tz),
261+
s2[1],
262+
s2[2],
263+
s2[3],
277264
end.tz_localize(tz),
278265
np.nan,
279-
np.nan,
280-
np.nan,
281-
np.nan,
282-
np.nan,
283-
np.nan,
284-
np.nan,
285266
],
286267
},
287-
index=[
288-
"count",
289-
"unique",
290-
"top",
291-
"freq",
292-
"first",
293-
"last",
294-
"mean",
295-
"std",
296-
"min",
297-
"25%",
298-
"50%",
299-
"75%",
300-
"max",
301-
],
268+
index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
302269
)
303270
result = df.describe(include="all")
304271
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_describe.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,14 @@ def test_describe_with_tz(self, tz_naive_fixture):
5757
expected = Series(
5858
[
5959
5,
60-
5,
61-
s.value_counts().index[0],
62-
1,
60+
Timestamp(2018, 1, 3).tz_localize(tz),
6361
start.tz_localize(tz),
62+
s[1],
63+
s[2],
64+
s[3],
6465
end.tz_localize(tz),
6566
],
6667
name=name,
67-
index=["count", "unique", "top", "freq", "first", "last"],
68+
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
6869
)
6970
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)