pandas-dev
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 168 additions & 0 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 168 additions & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 32 additions & 23 deletions b/‎pandas/_libs/groupby.pyx
Lines changed: 32 additions & 23 deletions
diff --git a/‎pandas/_libs/internals.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/internals.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 4 additions & 1 deletion b/‎pandas/_libs/lib.pyx
Lines changed: 4 additions & 1 deletion
diff --git a/‎pandas/_libs/reshape.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/reshape.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslibs/conversion.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslibs/conversion.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 4 additions & 0 deletions b/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/core/algorithms.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/algorithms.py
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,168 @@
+from typing import Literal
+
+import numpy as np
+
+def group_median_float64(
+    out: np.ndarray,       # ndarray[float64_t, ndim=2]
+    counts: np.ndarray,    # ndarray[int64_t]
+    values: np.ndarray,    # ndarray[float64_t, ndim=2]
+    labels: np.ndarray,    # ndarray[int64_t]
+    min_count: int = ...,  # Py_ssize_t
+) -> None: ...
+
+def group_cumprod_float64(
+    out: np.ndarray,     # float64_t[:, ::1]
+    values: np.ndarray,  # const float64_t[:, :]
+    labels: np.ndarray,  # const int64_t[:]
+    ngroups: int,
+    is_datetimelike: bool,
+    skipna: bool = ...,
+) -> None: ...
+
+def group_cumsum(
+    out: np.ndarray,     # numeric[:, ::1]
+    values: np.ndarray,  # ndarray[numeric, ndim=2]
+    labels: np.ndarray,  # const int64_t[:]
+    ngroups: int,
+    is_datetimelike: bool,
+    skipna: bool = ...,
+) -> None: ...
+
+
+def group_shift_indexer(
+    out: np.ndarray,     # int64_t[::1]
+    labels: np.ndarray,  # const int64_t[:]
+    ngroups: int,
+    periods: int,
+) -> None: ...
+
+
+def group_fillna_indexer(
+    out: np.ndarray,     # ndarray[int64_t]
+    labels: np.ndarray,  # ndarray[int64_t]
+    mask: np.ndarray,    # ndarray[uint8_t]
+    direction: Literal["ffill", "bfill"],
+    limit: int,          # int64_t
+    dropna: bool,
+) -> None: ...
+
+
+def group_any_all(
+    out: np.ndarray,     # uint8_t[::1]
+    values: np.ndarray,  # const uint8_t[::1]
+    labels: np.ndarray,  # const int64_t[:]
+    mask: np.ndarray,    # const uint8_t[::1]
+    val_test: Literal["any", "all"],
+    skipna: bool,
+) -> None: ...
+
+def group_add(
+    out: np.ndarray,     # complexfloating_t[:, ::1]
+    counts: np.ndarray,  # int64_t[::1]
+    values: np.ndarray,  # ndarray[complexfloating_t, ndim=2]
+    labels: np.ndarray,  # const intp_t[:]
+    min_count: int = ...
+) -> None: ...
+
+def group_prod(
+    out: np.ndarray,     # floating[:, ::1]
+    counts: np.ndarray,  # int64_t[::1]
+    values: np.ndarray,  # ndarray[floating, ndim=2]
+    labels: np.ndarray,  # const intp_t[:]
+    min_count: int = ...
+) -> None: ...
+
+def group_var(
+    out: np.ndarray,       # floating[:, ::1]
+    counts: np.ndarray,    # int64_t[::1]
+    values: np.ndarray,    # ndarray[floating, ndim=2]
+    labels: np.ndarray,    # const intp_t[:]
+    min_count: int = ...,  # Py_ssize_t
+    ddof: int = ...,       # int64_t
+) -> None: ...
+
+def group_mean(
+    out: np.ndarray,     # floating[:, ::1]
+    counts: np.ndarray,  # int64_t[::1]
+    values: np.ndarray,  # ndarray[floating, ndim=2]
+    labels: np.ndarray,  # const intp_t[:]
+    min_count: int = ...
+) -> None: ...
+
+def group_ohlc(
+    out: np.ndarray,     # floating[:, ::1]
+    counts: np.ndarray,  # int64_t[::1]
+    values: np.ndarray,  # ndarray[floating, ndim=2]
+    labels: np.ndarray,  # const intp_t[:]
+    min_count: int = ...
+) -> None: ...
+
+def group_quantile(
+    out: np.ndarray,     # ndarray[float64_t]
+    values: np.ndarray,  # ndarray[numeric, ndim=1]
+    labels: np.ndarray,  # ndarray[int64_t]
+    mask: np.ndarray,    # ndarray[uint8_t]
+    q: float,            # float64_t
+    interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
+) -> None: ...
+
+def group_last(
+    out: np.ndarray,       # rank_t[:, ::1]
+    counts: np.ndarray,    # int64_t[::1]
+    values: np.ndarray,    # ndarray[rank_t, ndim=2]
+    labels: np.ndarray,    # const int64_t[:]
+    min_count: int = ...,  # Py_ssize_t
+) -> None: ...
+
+def group_nth(
+    out: np.ndarray,        # rank_t[:, ::1]
+    counts: np.ndarray,     # int64_t[::1]
+    values: np.ndarray,     # ndarray[rank_t, ndim=2]
+    labels: np.ndarray,     # const int64_t[:]
+    min_count: int = ...,   # int64_t
+    rank: int = ...,        # int64_t
+) -> None: ...
+
+def group_rank(
+    out: np.ndarray,     # float64_t[:, ::1]
+    values: np.ndarray,  # ndarray[rank_t, ndim=2]
+    labels: np.ndarray,  # const int64_t[:]
+    ngroups: int,
+    is_datetimelike: bool,
+    ties_method: Literal["aveage", "min", "max", "first", "dense"] = ...,
+    ascending: bool = ...,
+    pct: bool = ...,
+    na_option: Literal["keep", "top", "bottom"] = ...,
+) -> None: ...
+
+def group_max(
+    out: np.ndarray,     # groupby_t[:, ::1]
+    counts: np.ndarray,  # int64_t[::1]
+    values: np.ndarray,  # ndarray[groupby_t, ndim=2]
+    labels: np.ndarray,  # const int64_t[:]
+    min_count: int = ...,
+) -> None: ...
+
+def group_min(
+    out: np.ndarray,     # groupby_t[:, ::1]
+    counts: np.ndarray,  # int64_t[::1]
+    values: np.ndarray,  # ndarray[groupby_t, ndim=2]
+    labels: np.ndarray,  # const int64_t[:]
+    min_count: int = ...,
+) -> None: ...
+
+def group_cummin(
+    out: np.ndarray,      # groupby_t[:, ::1]
+    values: np.ndarray,   # ndarray[groupby_t, ndim=2]
+    labels: np.ndarray,   # const int64_t[:]
+    ngroups: int,
+    is_datetimelike: bool,
+) -> None: ...
+
+def group_cummax(
+    out: np.ndarray,      # groupby_t[:, ::1]
+    values: np.ndarray,   # ndarray[groupby_t, ndim=2]
+    labels: np.ndarray,   # const int64_t[:]
+    ngroups: int,
+    is_datetimelike: bool,
+) -> None: ...
@@ -154,9 +154,9 @@ def group_cumprod_float64(float64_t[:, ::1] out,
 
     Parameters
     ----------
-    out : float64 array
+    out : np.ndarray[np.float64, ndim=2]
         Array to store cumprod in.
-    values : float64 array
+    values : np.ndarray[np.float64, ndim=2]
         Values to take cumprod of.
     labels : np.ndarray[np.intp]
         Labels to group by.
@@ -211,9 +211,9 @@ def group_cumsum(numeric[:, ::1] out,
 
     Parameters
     ----------
-    out : array
+    out : np.ndarray[ndim=2]
         Array to store cumsum in.
-    values : array
+    values : np.ndarray[ndim=2]
         Values to take cumsum of.
     labels : np.ndarray[np.intp]
         Labels to group by.
@@ -329,12 +329,15 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
 
     Parameters
     ----------
-    out : array of int64_t values which this method will write its results to
-        Missing values will be written to with a value of -1
+    out : np.ndarray[np.uint8]
+        Values into which this method will write its results.
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`.
-    mask : array of int64_t values where a 1 indicates a missing value
+    values : np.ndarray[np.uint8]
+        Containing the truth value of each element.
+    mask : np.ndarray[np.uint8]
+        Indicating whether a value is na or not.
     direction : {'ffill', 'bfill'}
         Direction for fill to be applied (forwards or backwards, respectively)
     limit : Consecutive values to fill before stopping, or -1 for no limit
@@ -396,12 +399,15 @@ def group_any_all(uint8_t[::1] out,
 
     Parameters
     ----------
-    out : array of values which this method will write its results to
+    out : np.ndarray[np.uint8]
+        Values into which this method will write its results.
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its
         ordering matching up to the corresponding record in `values`
-    values : array containing the truth value of each element
-    mask : array indicating whether a value is na or not
+    values : np.ndarray[np.uint8]
+        Containing the truth value of each element.
+    mask : np.ndarray[np.uint8]
+        Indicating whether a value is na or not.
     val_test : {'any', 'all'}
         String object dictating whether to use any or all truth testing
     skipna : bool
@@ -721,14 +727,17 @@ def group_quantile(ndarray[float64_t] out,
 
     Parameters
     ----------
-    out : ndarray
+    out : np.ndarray[np.float64]
         Array of aggregated values that will be written to.
+    values : np.ndarray
+        Array containing the values to apply the function against.
     labels : ndarray[np.intp]
         Array containing the unique group labels.
     values : ndarray
         Array containing the values to apply the function against.
     q : float
         The quantile value to search for.
+    interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
 
     Notes
     -----
@@ -1048,8 +1057,9 @@ def group_rank(float64_t[:, ::1] out,
 
     Parameters
     ----------
-    out : array of float64_t values which this method will write its results to
-    values : array of rank_t values to be ranked
+    out : np.ndarray[np.float64, ndim=2]
+        Values to which this method will write its results.
+    values : np.ndarray of rank_t values to be ranked
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
@@ -1058,8 +1068,7 @@ def group_rank(float64_t[:, ::1] out,
         groupby functions.
     is_datetimelike : bool
         True if `values` contains datetime-like entries.
-    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
-        'average'
+    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
         * average: average rank of group
         * min: lowest rank in group
         * max: highest rank in group
@@ -1120,9 +1129,9 @@ cdef group_min_max(groupby_t[:, ::1] out,
 
     Parameters
     ----------
-    out : array
+    out : np.ndarray[groupby_t, ndim=2]
         Array to store result in.
-    counts : int64 array
+    counts : np.ndarray[int64]
         Input as a zeroed array, populated by group sizes during algorithm
     values : array
         Values to find column-wise min/max of.
@@ -1242,9 +1251,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
 
     Parameters
     ----------
-    out : array
+    out : np.ndarray[groupby_t, ndim=2]
         Array to store cummin/max in.
-    values : array
+    values : np.ndarray[groupby_t, ndim=2]
         Values to take cummin/max of.
     mask : array[uint8_t] or None
         If not None, indices represent missing values,
@@ -1325,10 +1334,10 @@ cdef group_cummin_max(groupby_t[:, ::1] out,
 @cython.wraparound(False)
 def group_cummin(groupby_t[:, ::1] out,
                  ndarray[groupby_t, ndim=2] values,
-                 uint8_t[:, ::1] mask,
                  const intp_t[:] labels,
                  int ngroups,
-                 bint is_datetimelike):
+                 bint is_datetimelike,
+                 uint8_t[:, ::1] mask=None):
     """See group_cummin_max.__doc__"""
     group_cummin_max(
         out,
@@ -1345,10 +1354,10 @@ def group_cummin(groupby_t[:, ::1] out,
 @cython.wraparound(False)
 def group_cummax(groupby_t[:, ::1] out,
                  ndarray[groupby_t, ndim=2] values,
-                 uint8_t[:, ::1] mask,
                  const intp_t[:] labels,
                  int ngroups,
-                 bint is_datetimelike):
+                 bint is_datetimelike,
+                 uint8_t[:, ::1] mask=None):
     """See group_cummin_max.__doc__"""
     group_cummin_max(
         out,
 
@@ -385,7 +385,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
 
     Returns
     -------
-    iter : iterator of (int, slice or array)
+    list[tuple[int, slice | np.ndarray]]
     """
     # There's blkno in this function's name because it's used in block &
     # blockno handling.
 
@@ -916,7 +916,7 @@ def indices_fast(ndarray[intp_t] index, const int64_t[:] labels, list keys,
     """
     Parameters
     ----------
-    index : ndarray
+    index : ndarray[intp]
     labels : ndarray[int64]
     keys : list
     sorted_labels : list[ndarray[int64]]
@@ -2440,6 +2440,9 @@ class NoDefault(Enum):
     # 2) because mypy does not understand singletons
     no_default = "NO_DEFAULT"
 
+    def __repr__(self) -> str:
+        return "<no_default>"
+
 
 # Note: no_default is exported to the public API in pandas.api.extensions
 no_default = NoDefault.no_default  # Sentinel indicating the default value.
 
@@ -52,7 +52,7 @@ def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
     stride : int
     length : int
     width : int
-    new_values : typed ndarray
+    new_values : np.ndarray[bool]
         result array
     new_mask : np.ndarray[bool]
         result mask
 
@@ -316,7 +316,7 @@ def datetime_to_datetime64(ndarray[object] values):
 
     Returns
     -------
-    result : ndarray[int64_t]
+    result : ndarray[datetime64ns]
     inferred_tz : tzinfo or None
     """
     cdef:
 
@@ -315,6 +315,10 @@ def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="rais
     """
     Convert an ndarray to an array of timedeltas. If errors == 'coerce',
     coerce non-convertible objects to NaT. Otherwise, raise.
+
+    Returns
+    -------
+    np.ndarray[timedelta64ns]
     """
 
     cdef:
 
@@ -943,7 +943,7 @@ def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarra
 
     Returns
     -------
-    duplicated : ndarray
+    duplicated : ndarray[bool]
     """
     values, _ = _ensure_data(values)
     ndtype = values.dtype.name
@@ -1631,7 +1631,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
         number of periods
     axis : {0, 1}
         axis to shift on
-    stacklevel : int
+    stacklevel : int, default 3
         The stacklevel for the lost dtype warning.
 
     Returns