pandas-dev
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/development/contributing_codebase.rst
Lines changed: 36 additions & 6 deletions b/‎doc/source/development/contributing_codebase.rst
Lines changed: 36 additions & 6 deletions
diff --git a/‎doc/source/development/developer.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/development/developer.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/reference/groupby.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/reference/groupby.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 11 additions & 1 deletion b/‎doc/source/user_guide/io.rst
Lines changed: 11 additions & 1 deletion
diff --git a/‎doc/source/user_guide/timeseries.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/timeseries.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 12 additions & 3 deletions b/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 12 additions & 3 deletions
diff --git a/‎pandas/__init__.py
Lines changed: 5 additions & 3 deletions b/‎pandas/__init__.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎pandas/_libs/sparse_op_helper.pxi.in
Lines changed: 5 additions & 0 deletions b/‎pandas/_libs/sparse_op_helper.pxi.in
Lines changed: 5 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 11 additions & 5 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 11 additions & 5 deletions
diff --git a/‎pandas/_libs/tslibs/period.pyx
Lines changed: 4 additions & 1 deletion b/‎pandas/_libs/tslibs/period.pyx
Lines changed: 4 additions & 1 deletion
@@ -50,6 +50,8 @@ dist
 *.egg-info
 .eggs
 .pypirc
+# type checkers
+pandas/py.typed
 
 # tox testing tool
 .tox
 
@@ -303,7 +303,7 @@ pandas strongly encourages the use of :pep:`484` style type hints. New developme
 Style guidelines
 ~~~~~~~~~~~~~~~~
 
-Types imports should follow the ``from typing import ...`` convention. So rather than
+Type imports should follow the ``from typing import ...`` convention. Some types do not need to be imported since :pep:`585` some builtin constructs, such as ``list`` and ``tuple``, can directly be used for type annotations. So rather than
 
 .. code-block:: python
 
@@ -315,21 +315,31 @@ You should write
 
 .. code-block:: python
 
-   from typing import List, Optional, Union
+   primes: list[int] = []
 
-   primes: List[int] = []
+``Optional`` should be  avoided in favor of the shorter ``| None``, so instead of
 
-``Optional`` should be used where applicable, so instead of
+.. code-block:: python
+
+   from typing import Union
+
+   maybe_primes: list[Union[int, None]] = []
+
+or
 
 .. code-block:: python
 
-   maybe_primes: List[Union[int, None]] = []
+   from typing import Optional
+
+   maybe_primes: list[Optional[int]] = []
 
 You should write
 
 .. code-block:: python
 
-   maybe_primes: List[Optional[int]] = []
+   from __future__ import annotations  # noqa: F404
+
+   maybe_primes: list[int | None] = []
 
 In some cases in the code base classes may define class variables that shadow builtins. This causes an issue as described in `Mypy 1775 <https://github.com/python/mypy/issues/1775#issuecomment-310969854>`_. The defensive solution here is to create an unambiguous alias of the builtin and use that without your annotation. For example, if you come across a definition like
 
@@ -410,6 +420,26 @@ A recent version of ``numpy`` (>=1.21.0) is required for type validation.
 
 .. _contributing.ci:
 
+Testing type hints in code using pandas
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. warning::
+
+    * Pandas is not yet a py.typed library (:pep:`561`)!
+      The primary purpose of locally declaring pandas as a py.typed library is to test and
+      improve the pandas-builtin type annotations.
+
+Until pandas becomes a py.typed library, it is possible to easily experiment with the type
+annotations shipped with pandas by creating an empty file named "py.typed" in the pandas
+installation folder:
+
+.. code-block:: none
+
+   python -c "import pandas; import pathlib; (pathlib.Path(pandas.__path__[0]) / 'py.typed').touch()"
+
+The existence of the py.typed file signals to type checkers that pandas is already a py.typed
+library. This makes type checkers aware of the type annotations shipped with pandas.
+
 Testing with continuous integration
 -----------------------------------
 
 
@@ -180,7 +180,7 @@ As an example of fully-formed metadata:
          'numpy_type': 'int64',
          'metadata': None}
     ],
-    'pandas_version': '0.20.0',
+    'pandas_version': '1.4.0',
     'creator': {
       'library': 'pyarrow',
       'version': '0.13.0'
 
@@ -122,6 +122,7 @@ application to columns of a specific data type.
    DataFrameGroupBy.skew
    DataFrameGroupBy.take
    DataFrameGroupBy.tshift
+   DataFrameGroupBy.value_counts
 
 The following methods are available only for ``SeriesGroupBy`` objects.
 
 
@@ -1903,6 +1903,7 @@ with optional parameters:
      ``index``; dict like {index -> {column -> value}}
      ``columns``; dict like {column -> {index -> value}}
      ``values``; just the values array
+     ``table``; adhering to the JSON `Table Schema`_
 
 * ``date_format`` : string, type of date conversion, 'epoch' for timestamp, 'iso' for ISO8601.
 * ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10.
@@ -2477,7 +2478,6 @@ A few notes on the generated table schema:
     * For ``MultiIndex``, ``mi.names`` is used. If any level has no name,
       then ``level_<i>`` is used.
 
-
 ``read_json`` also accepts ``orient='table'`` as an argument. This allows for
 the preservation of metadata such as dtypes and index names in a
 round-trippable manner.
@@ -2519,8 +2519,18 @@ indicate missing values and the subsequent read cannot distinguish the intent.
 
    os.remove("test.json")
 
+When using ``orient='table'`` along with user-defined ``ExtensionArray``,
+the generated schema will contain an additional ``extDtype`` key in the respective
+``fields`` element. This extra key is not standard but does enable JSON roundtrips
+for extension types (e.g. ``read_json(df.to_json(orient="table"), orient="table")``).
+
+The ``extDtype`` key carries the name of the extension, if you have properly registered
+the ``ExtensionDtype``, pandas will use said name to perform a lookup into the registry
+and re-convert the serialized data into your custom dtype.
+
 .. _Table Schema: https://specs.frictionlessdata.io/table-schema/
 
+
 HTML
 ----
 
 
@@ -2424,7 +2424,7 @@ you can use the ``tz_convert`` method.
 
     For ``pytz`` time zones, it is incorrect to pass a time zone object directly into
     the ``datetime.datetime`` constructor
-    (e.g., ``datetime.datetime(2011, 1, 1, tz=pytz.timezone('US/Eastern'))``.
+    (e.g., ``datetime.datetime(2011, 1, 1, tzinfo=pytz.timezone('US/Eastern'))``.
     Instead, the datetime needs to be localized using the ``localize`` method
     on the ``pytz`` time zone object.
 
 
@@ -217,9 +217,10 @@ Other enhancements
 - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
 - :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is ``None`` (:issue:`43563`, :issue:`32835`, :issue:`25549`)
 - :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`)
+- Added :meth:`DataFrameGroupBy.value_counts` (:issue:`43564`)
 - :class:`ExcelWriter` argument ``if_sheet_exists="overlay"`` option added (:issue:`40231`)
 - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`)
-- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, and :meth:`.GroupBy.var` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`)
+- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, :meth:`.GroupBy.var`, :meth:`.GroupBy.sum` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`, :issue:`44939`)
 - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
 - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`)
 - New option ``display.max_dir_items`` customizes the number of columns added to :meth:`Dataframe.__dir__` and suggested for tab completion (:issue:`37996`)
@@ -231,6 +232,7 @@ Other enhancements
 - :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`)
 - :meth:`read_json` can now parse unsigned long long integers (:issue:`26068`)
 - :meth:`DataFrame.take` now raises a ``TypeError`` when passed a scalar for the indexer (:issue:`42875`)
+- :class:`ExtensionDtype` and :class:`ExtensionArray` are now (de)serialized when exporting a :class:`DataFrame` with :meth:`DataFrame.to_json` using ``orient='table'`` (:issue:`20612`, :issue:`44705`).
 -
 
 
@@ -454,6 +456,7 @@ Other API changes
 - :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`)
 - Change in the position of the ``min_rows`` argument in :meth:`DataFrame.to_string` due to change in the docstring (:issue:`44304`)
 - Reduction operations for :class:`DataFrame` or :class:`Series` now raising a ``ValueError`` when ``None`` is passed for ``skipna`` (:issue:`44178`)
+- :func:`read_csv` and :func:`read_html` no longer raising an error when one of the header rows consists only of ``Unnamed:`` columns (:issue:`13054`)
 - Changed the ``name`` attribute of several holidays in
   ``USFederalHolidayCalendar`` to match `official federal holiday
   names <https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/>`_
@@ -529,7 +532,7 @@ Other Deprecations
 - Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
 - Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
 - Deprecated casting behavior when setting timezone-aware value(s) into a timezone-aware :class:`Series` or :class:`DataFrame` column when the timezones do not match. Previously this cast to object dtype. In a future version, the values being inserted will be converted to the series or column's existing timezone (:issue:`37605`)
-- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`)
+- Deprecated casting behavior when passing an item with mismatched-timezone to :meth:`DatetimeIndex.insert`, :meth:`DatetimeIndex.putmask`, :meth:`DatetimeIndex.where` :meth:`DatetimeIndex.fillna`, :meth:`Series.mask`, :meth:`Series.where`, :meth:`Series.fillna`, :meth:`Series.shift`, :meth:`Series.replace`, :meth:`Series.reindex` (and :class:`DataFrame` column analogues). In the past this has cast to object dtype. In a future version, these will cast the passed item to the index or series's timezone (:issue:`37605`,:issue:`44940`)
 - Deprecated the 'errors' keyword argument in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, and meth:`DataFrame.mask`; in a future version the argument will be removed (:issue:`44294`)
 - Deprecated the ``prefix`` keyword argument in :func:`read_csv` and :func:`read_table`, in a future version the argument will be removed (:issue:`43396`)
 - Deprecated :meth:`PeriodIndex.astype` to ``datetime64[ns]`` or ``DatetimeTZDtype``, use ``obj.to_timestamp(how).tz_localize(dtype.tz)`` instead (:issue:`44398`)
@@ -540,6 +543,7 @@ Other Deprecations
 - Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`)
 - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
 - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
+- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -627,6 +631,7 @@ Datetimelike
 - Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`)
 - Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`)
 - Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
+- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result. (:issue:`43968`)
 -
 
 Timedelta
@@ -760,6 +765,7 @@ I/O
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 - Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`)
 - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
+- Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`)
 - Bug in :func:`read_csv` silently ignoring errors when failling to create a memory-mapped file (:issue:`44766`)
 - Bug in :func:`read_csv` when passing a ``tempfile.SpooledTemporaryFile`` opened in binary mode (:issue:`44748`)
 -
@@ -769,6 +775,7 @@ Period
 - Bug in adding a :class:`Period` object to a ``np.timedelta64`` object incorrectly raising ``TypeError`` (:issue:`44182`)
 - Bug in :meth:`PeriodIndex.to_timestamp` when the index has ``freq="B"`` inferring ``freq="D"`` for its result instead of ``freq="B"`` (:issue:`44105`)
 - Bug in :class:`Period` constructor incorrectly allowing ``np.timedelta64("NaT")`` (:issue:`44507`)
+- Bug in :meth:`PeriodIndex.to_timestamp` giving incorrect values for indexes with non-contiguous data (:issue:`44100`)
 -
 
 Plotting
@@ -794,6 +801,7 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`)
+- Bug in :meth:`GroupBy.mean` raising ``KeyError`` when column was selected at least twice (:issue:`44924`)
 - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`)
 - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`)
@@ -824,6 +832,7 @@ Sparse
 - Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`)
 - Bug in :meth:`DataFrame.sparse.to_coo` silently converting non-zero fill values to zero (:issue:`24817`)
 - Bug in :class:`SparseArray` comparison methods with an array-like operand of mismatched length raising ``AssertionError`` or unclear ``ValueError`` depending on the input (:issue:`43863`)
+- Bug in :class:`SparseArray` arithmetic methods ``floordiv`` and ``mod`` behaviors when dividing by zero not matching the non-sparse :class:`Series` behavior (:issue:`38172`)
 -
 
 ExtensionArray
@@ -837,7 +846,7 @@ ExtensionArray
 - Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
 - Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
 - Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
--
+- Fixed bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`)
 
 Styler
 ^^^^^^
 
@@ -23,13 +23,15 @@
 
 try:
     from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
-except ImportError as e:  # pragma: no cover
-    module = e.name
+except ImportError as err:  # pragma: no cover
+    module = err.name
     raise ImportError(
         f"C extension: {module} not built. If you want to import "
         "pandas from the source directory, you may need to run "
         "'python setup.py build_ext --force' to build the C extensions first."
-    ) from e
+    ) from err
+else:
+    del _tslib, _lib, _hashtable
 
 from pandas._config import (
     get_option,
 
@@ -42,6 +42,11 @@ cdef inline sparse_t __mod__(sparse_t a, sparse_t b):
 cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b):
     if b == 0:
         if sparse_t is float64_t:
+            # Match non-sparse Series behavior implemented in mask_zero_div_zero
+            if a > 0:
+                return INF
+            elif a < 0:
+                return -INF
             return NaN
         else:
             return 0
 
@@ -186,8 +186,9 @@ def apply_wraps(func):
         if self.normalize:
             result = result.normalize()
 
-        # nanosecond may be deleted depending on offset process
-        if not self.normalize and nano != 0:
+        # If the offset object does not have a nanoseconds component,
+        # the result's nanosecond component may be lost.
+        if not self.normalize and nano != 0 and not hasattr(self, "nanoseconds"):
             if result.nanosecond != nano:
                 if result.tz is not None:
                     # convert to UTC
@@ -333,7 +334,7 @@ cdef _determine_offset(kwds):
             # sub-daily offset - use timedelta (tz-aware)
             offset = timedelta(**kwds_no_nanos)
     else:
-        offset = timedelta(1)
+        offset = timedelta(0)
     return offset, use_relativedelta
 
 
@@ -1068,12 +1069,17 @@ cdef class RelativeDeltaOffset(BaseOffset):
                 # perform calculation in UTC
                 other = other.replace(tzinfo=None)
 
+            if hasattr(self, "nanoseconds"):
+                td_nano = Timedelta(nanoseconds=self.nanoseconds)
+            else:
+                td_nano = Timedelta(0)
+
             if self.n > 0:
                 for i in range(self.n):
-                    other = other + self._offset
+                    other = other + self._offset + td_nano
             else:
                 for i in range(-self.n):
-                    other = other - self._offset
+                    other = other - self._offset - td_nano
 
             if tzinfo is not None and self._use_relativedelta:
                 # bring tz back from UTC calculation
 
@@ -1088,6 +1088,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
     """
     cdef:
         Py_ssize_t n = len(arr)
+        Py_ssize_t increment = arr.strides[0] // 8
         ndarray[int64_t] result = np.empty(n, dtype=np.int64)
 
     _period_asfreq(
@@ -1097,6 +1098,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
         freq1,
         freq2,
         end,
+        increment,
     )
     return result
 
@@ -1110,6 +1112,7 @@ cdef void _period_asfreq(
     int freq1,
     int freq2,
     bint end,
+    Py_ssize_t increment=1,
 ):
     """See period_asfreq.__doc__"""
     cdef:
@@ -1127,7 +1130,7 @@ cdef void _period_asfreq(
     get_asfreq_info(freq1, freq2, end, &af_info)
 
     for i in range(length):
-        val = ordinals[i]
+        val = ordinals[i * increment]
         if val != NPY_NAT:
             val = func(val, &af_info)
         out[i] = val