Skip to content

Commit 836175b

Browse files
committed
Merge remote-tracking branch 'origin/master' into perf/masked_cummin/max
2 parents 02768ec + e69df38 commit 836175b

26 files changed

+515
-363
lines changed

pandas/_libs/groupby.pyi

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
from typing import Literal
2+
3+
import numpy as np
4+
5+
def group_median_float64(
6+
out: np.ndarray, # ndarray[float64_t, ndim=2]
7+
counts: np.ndarray, # ndarray[int64_t]
8+
values: np.ndarray, # ndarray[float64_t, ndim=2]
9+
labels: np.ndarray, # ndarray[int64_t]
10+
min_count: int = ..., # Py_ssize_t
11+
) -> None: ...
12+
13+
def group_cumprod_float64(
14+
out: np.ndarray, # float64_t[:, ::1]
15+
values: np.ndarray, # const float64_t[:, :]
16+
labels: np.ndarray, # const int64_t[:]
17+
ngroups: int,
18+
is_datetimelike: bool,
19+
skipna: bool = ...,
20+
) -> None: ...
21+
22+
def group_cumsum(
23+
out: np.ndarray, # numeric[:, ::1]
24+
values: np.ndarray, # ndarray[numeric, ndim=2]
25+
labels: np.ndarray, # const int64_t[:]
26+
ngroups: int,
27+
is_datetimelike: bool,
28+
skipna: bool = ...,
29+
) -> None: ...
30+
31+
32+
def group_shift_indexer(
33+
out: np.ndarray, # int64_t[::1]
34+
labels: np.ndarray, # const int64_t[:]
35+
ngroups: int,
36+
periods: int,
37+
) -> None: ...
38+
39+
40+
def group_fillna_indexer(
41+
out: np.ndarray, # ndarray[int64_t]
42+
labels: np.ndarray, # ndarray[int64_t]
43+
mask: np.ndarray, # ndarray[uint8_t]
44+
direction: Literal["ffill", "bfill"],
45+
limit: int, # int64_t
46+
dropna: bool,
47+
) -> None: ...
48+
49+
50+
def group_any_all(
51+
out: np.ndarray, # uint8_t[::1]
52+
values: np.ndarray, # const uint8_t[::1]
53+
labels: np.ndarray, # const int64_t[:]
54+
mask: np.ndarray, # const uint8_t[::1]
55+
val_test: Literal["any", "all"],
56+
skipna: bool,
57+
) -> None: ...
58+
59+
def group_add(
60+
out: np.ndarray, # complexfloating_t[:, ::1]
61+
counts: np.ndarray, # int64_t[::1]
62+
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]
63+
labels: np.ndarray, # const intp_t[:]
64+
min_count: int = ...
65+
) -> None: ...
66+
67+
def group_prod(
68+
out: np.ndarray, # floating[:, ::1]
69+
counts: np.ndarray, # int64_t[::1]
70+
values: np.ndarray, # ndarray[floating, ndim=2]
71+
labels: np.ndarray, # const intp_t[:]
72+
min_count: int = ...
73+
) -> None: ...
74+
75+
def group_var(
76+
out: np.ndarray, # floating[:, ::1]
77+
counts: np.ndarray, # int64_t[::1]
78+
values: np.ndarray, # ndarray[floating, ndim=2]
79+
labels: np.ndarray, # const intp_t[:]
80+
min_count: int = ..., # Py_ssize_t
81+
ddof: int = ..., # int64_t
82+
) -> None: ...
83+
84+
def group_mean(
85+
out: np.ndarray, # floating[:, ::1]
86+
counts: np.ndarray, # int64_t[::1]
87+
values: np.ndarray, # ndarray[floating, ndim=2]
88+
labels: np.ndarray, # const intp_t[:]
89+
min_count: int = ...
90+
) -> None: ...
91+
92+
def group_ohlc(
93+
out: np.ndarray, # floating[:, ::1]
94+
counts: np.ndarray, # int64_t[::1]
95+
values: np.ndarray, # ndarray[floating, ndim=2]
96+
labels: np.ndarray, # const intp_t[:]
97+
min_count: int = ...
98+
) -> None: ...
99+
100+
def group_quantile(
101+
out: np.ndarray, # ndarray[float64_t]
102+
values: np.ndarray, # ndarray[numeric, ndim=1]
103+
labels: np.ndarray, # ndarray[int64_t]
104+
mask: np.ndarray, # ndarray[uint8_t]
105+
q: float, # float64_t
106+
interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
107+
) -> None: ...
108+
109+
def group_last(
110+
out: np.ndarray, # rank_t[:, ::1]
111+
counts: np.ndarray, # int64_t[::1]
112+
values: np.ndarray, # ndarray[rank_t, ndim=2]
113+
labels: np.ndarray, # const int64_t[:]
114+
min_count: int = ..., # Py_ssize_t
115+
) -> None: ...
116+
117+
def group_nth(
118+
out: np.ndarray, # rank_t[:, ::1]
119+
counts: np.ndarray, # int64_t[::1]
120+
values: np.ndarray, # ndarray[rank_t, ndim=2]
121+
labels: np.ndarray, # const int64_t[:]
122+
min_count: int = ..., # int64_t
123+
rank: int = ..., # int64_t
124+
) -> None: ...
125+
126+
def group_rank(
127+
out: np.ndarray, # float64_t[:, ::1]
128+
values: np.ndarray, # ndarray[rank_t, ndim=2]
129+
labels: np.ndarray, # const int64_t[:]
130+
ngroups: int,
131+
is_datetimelike: bool,
132+
ties_method: Literal["aveage", "min", "max", "first", "dense"] = ...,
133+
ascending: bool = ...,
134+
pct: bool = ...,
135+
na_option: Literal["keep", "top", "bottom"] = ...,
136+
) -> None: ...
137+
138+
def group_max(
139+
out: np.ndarray, # groupby_t[:, ::1]
140+
counts: np.ndarray, # int64_t[::1]
141+
values: np.ndarray, # ndarray[groupby_t, ndim=2]
142+
labels: np.ndarray, # const int64_t[:]
143+
min_count: int = ...,
144+
) -> None: ...
145+
146+
def group_min(
147+
out: np.ndarray, # groupby_t[:, ::1]
148+
counts: np.ndarray, # int64_t[::1]
149+
values: np.ndarray, # ndarray[groupby_t, ndim=2]
150+
labels: np.ndarray, # const int64_t[:]
151+
min_count: int = ...,
152+
) -> None: ...
153+
154+
def group_cummin(
155+
out: np.ndarray, # groupby_t[:, ::1]
156+
values: np.ndarray, # ndarray[groupby_t, ndim=2]
157+
labels: np.ndarray, # const int64_t[:]
158+
ngroups: int,
159+
is_datetimelike: bool,
160+
) -> None: ...
161+
162+
def group_cummax(
163+
out: np.ndarray, # groupby_t[:, ::1]
164+
values: np.ndarray, # ndarray[groupby_t, ndim=2]
165+
labels: np.ndarray, # const int64_t[:]
166+
ngroups: int,
167+
is_datetimelike: bool,
168+
) -> None: ...

pandas/_libs/groupby.pyx

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ def group_cumprod_float64(float64_t[:, ::1] out,
154154
155155
Parameters
156156
----------
157-
out : float64 array
157+
out : np.ndarray[np.float64, ndim=2]
158158
Array to store cumprod in.
159-
values : float64 array
159+
values : np.ndarray[np.float64, ndim=2]
160160
Values to take cumprod of.
161161
labels : np.ndarray[np.intp]
162162
Labels to group by.
@@ -211,9 +211,9 @@ def group_cumsum(numeric[:, ::1] out,
211211
212212
Parameters
213213
----------
214-
out : array
214+
out : np.ndarray[ndim=2]
215215
Array to store cumsum in.
216-
values : array
216+
values : np.ndarray[ndim=2]
217217
Values to take cumsum of.
218218
labels : np.ndarray[np.intp]
219219
Labels to group by.
@@ -329,12 +329,15 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
329329
330330
Parameters
331331
----------
332-
out : array of int64_t values which this method will write its results to
333-
Missing values will be written to with a value of -1
332+
out : np.ndarray[np.uint8]
333+
Values into which this method will write its results.
334334
labels : np.ndarray[np.intp]
335335
Array containing unique label for each group, with its ordering
336336
matching up to the corresponding record in `values`.
337-
mask : array of int64_t values where a 1 indicates a missing value
337+
values : np.ndarray[np.uint8]
338+
Containing the truth value of each element.
339+
mask : np.ndarray[np.uint8]
340+
Indicating whether a value is na or not.
338341
direction : {'ffill', 'bfill'}
339342
Direction for fill to be applied (forwards or backwards, respectively)
340343
limit : Consecutive values to fill before stopping, or -1 for no limit
@@ -396,12 +399,15 @@ def group_any_all(uint8_t[::1] out,
396399
397400
Parameters
398401
----------
399-
out : array of values which this method will write its results to
402+
out : np.ndarray[np.uint8]
403+
Values into which this method will write its results.
400404
labels : np.ndarray[np.intp]
401405
Array containing unique label for each group, with its
402406
ordering matching up to the corresponding record in `values`
403-
values : array containing the truth value of each element
404-
mask : array indicating whether a value is na or not
407+
values : np.ndarray[np.uint8]
408+
Containing the truth value of each element.
409+
mask : np.ndarray[np.uint8]
410+
Indicating whether a value is na or not.
405411
val_test : {'any', 'all'}
406412
String object dictating whether to use any or all truth testing
407413
skipna : bool
@@ -721,14 +727,17 @@ def group_quantile(ndarray[float64_t] out,
721727
722728
Parameters
723729
----------
724-
out : ndarray
730+
out : np.ndarray[np.float64]
725731
Array of aggregated values that will be written to.
732+
values : np.ndarray
733+
Array containing the values to apply the function against.
726734
labels : ndarray[np.intp]
727735
Array containing the unique group labels.
728736
values : ndarray
729737
Array containing the values to apply the function against.
730738
q : float
731739
The quantile value to search for.
740+
interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
732741
733742
Notes
734743
-----
@@ -1048,8 +1057,9 @@ def group_rank(float64_t[:, ::1] out,
10481057
10491058
Parameters
10501059
----------
1051-
out : array of float64_t values which this method will write its results to
1052-
values : array of rank_t values to be ranked
1060+
out : np.ndarray[np.float64, ndim=2]
1061+
Values to which this method will write its results.
1062+
values : np.ndarray of rank_t values to be ranked
10531063
labels : np.ndarray[np.intp]
10541064
Array containing unique label for each group, with its ordering
10551065
matching up to the corresponding record in `values`
@@ -1058,8 +1068,7 @@ def group_rank(float64_t[:, ::1] out,
10581068
groupby functions.
10591069
is_datetimelike : bool
10601070
True if `values` contains datetime-like entries.
1061-
ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
1062-
'average'
1071+
ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
10631072
* average: average rank of group
10641073
* min: lowest rank in group
10651074
* max: highest rank in group
@@ -1120,9 +1129,9 @@ cdef group_min_max(groupby_t[:, ::1] out,
11201129
11211130
Parameters
11221131
----------
1123-
out : array
1132+
out : np.ndarray[groupby_t, ndim=2]
11241133
Array to store result in.
1125-
counts : int64 array
1134+
counts : np.ndarray[int64]
11261135
Input as a zeroed array, populated by group sizes during algorithm
11271136
values : array
11281137
Values to find column-wise min/max of.
@@ -1242,9 +1251,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
12421251
12431252
Parameters
12441253
----------
1245-
out : array
1254+
out : np.ndarray[groupby_t, ndim=2]
12461255
Array to store cummin/max in.
1247-
values : array
1256+
values : np.ndarray[groupby_t, ndim=2]
12481257
Values to take cummin/max of.
12491258
mask : array[uint8_t] or None
12501259
If not None, indices represent missing values,
@@ -1325,10 +1334,10 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
13251334
@cython.wraparound(False)
13261335
def group_cummin(groupby_t[:, ::1] out,
13271336
ndarray[groupby_t, ndim=2] values,
1328-
uint8_t[:, ::1] mask,
13291337
const intp_t[:] labels,
13301338
int ngroups,
1331-
bint is_datetimelike):
1339+
bint is_datetimelike,
1340+
uint8_t[:, ::1] mask=None):
13321341
"""See group_cummin_max.__doc__"""
13331342
group_cummin_max(
13341343
out,
@@ -1345,10 +1354,10 @@ def group_cummin(groupby_t[:, ::1] out,
13451354
@cython.wraparound(False)
13461355
def group_cummax(groupby_t[:, ::1] out,
13471356
ndarray[groupby_t, ndim=2] values,
1348-
uint8_t[:, ::1] mask,
13491357
const intp_t[:] labels,
13501358
int ngroups,
1351-
bint is_datetimelike):
1359+
bint is_datetimelike,
1360+
uint8_t[:, ::1] mask=None):
13521361
"""See group_cummin_max.__doc__"""
13531362
group_cummin_max(
13541363
out,

pandas/_libs/internals.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
385385
386386
Returns
387387
-------
388-
iter : iterator of (int, slice or array)
388+
list[tuple[int, slice | np.ndarray]]
389389
"""
390390
# There's blkno in this function's name because it's used in block &
391391
# blockno handling.

pandas/_libs/lib.pyx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,7 @@ def indices_fast(ndarray[intp_t] index, const int64_t[:] labels, list keys,
916916
"""
917917
Parameters
918918
----------
919-
index : ndarray
919+
index : ndarray[intp]
920920
labels : ndarray[int64]
921921
keys : list
922922
sorted_labels : list[ndarray[int64]]
@@ -2440,6 +2440,9 @@ class NoDefault(Enum):
24402440
# 2) because mypy does not understand singletons
24412441
no_default = "NO_DEFAULT"
24422442

2443+
def __repr__(self) -> str:
2444+
return "<no_default>"
2445+
24432446

24442447
# Note: no_default is exported to the public API in pandas.api.extensions
24452448
no_default = NoDefault.no_default # Sentinel indicating the default value.

pandas/_libs/reshape.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
5252
stride : int
5353
length : int
5454
width : int
55-
new_values : typed ndarray
55+
new_values : np.ndarray[bool]
5656
result array
5757
new_mask : np.ndarray[bool]
5858
result mask

pandas/_libs/tslibs/conversion.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def datetime_to_datetime64(ndarray[object] values):
316316
317317
Returns
318318
-------
319-
result : ndarray[int64_t]
319+
result : ndarray[datetime64ns]
320320
inferred_tz : tzinfo or None
321321
"""
322322
cdef:

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,10 @@ def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="rais
315315
"""
316316
Convert an ndarray to an array of timedeltas. If errors == 'coerce',
317317
coerce non-convertible objects to NaT. Otherwise, raise.
318+
319+
Returns
320+
-------
321+
np.ndarray[timedelta64ns]
318322
"""
319323

320324
cdef:

pandas/core/algorithms.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarra
943943
944944
Returns
945945
-------
946-
duplicated : ndarray
946+
duplicated : ndarray[bool]
947947
"""
948948
values, _ = _ensure_data(values)
949949
ndtype = values.dtype.name
@@ -1631,7 +1631,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
16311631
number of periods
16321632
axis : {0, 1}
16331633
axis to shift on
1634-
stacklevel : int
1634+
stacklevel : int, default 3
16351635
The stacklevel for the lost dtype warning.
16361636
16371637
Returns

0 commit comments

Comments
 (0)