pandas-dev
diff --git a/‎.github/workflows/macos-windows.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/macos-windows.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/ubuntu.yml
Lines changed: 4 additions & 1 deletion b/‎.github/workflows/ubuntu.yml
Lines changed: 4 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 10 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/source/reference/indexing.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/reference/indexing.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 5 additions & 4 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 5 additions & 4 deletions
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 13 additions & 8 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 13 additions & 8 deletions
diff --git a/‎pandas/_libs/src/ujson/lib/ultrajsonenc.c
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/src/ujson/lib/ultrajsonenc.c
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 47 additions & 37 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 47 additions & 37 deletions
diff --git a/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 3 additions & 1 deletion b/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 15 additions & 6 deletions b/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 15 additions & 6 deletions
diff --git a/‎pandas/core/arrays/arrow/array.py
Lines changed: 39 additions & 0 deletions b/‎pandas/core/arrays/arrow/array.py
Lines changed: 39 additions & 0 deletions
diff --git a/‎pandas/core/arrays/masked.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/arrays/masked.py
Lines changed: 2 additions & 1 deletion
@@ -16,6 +16,7 @@ env:
   PANDAS_CI: 1
   PYTEST_TARGET: pandas
   PATTERN: "not slow and not db and not network and not single_cpu"
+  TEST_ARGS: "-W error:::pandas"
 
 
 permissions:
 
@@ -38,6 +38,7 @@ jobs:
           - name: "Minimum Versions"
             env_file: actions-38-minimum_versions.yaml
             pattern: "not slow and not network and not single_cpu"
+            test_args: ""
           - name: "Locale: it_IT"
             env_file: actions-38.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -62,10 +63,12 @@ jobs:
             env_file: actions-310.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_copy_on_write: "1"
+            test_args: ""
           - name: "Data Manager"
             env_file: actions-38.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_data_manager: "array"
+            test_args: ""
           - name: "Pypy"
             env_file: actions-pypy-38.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -93,7 +96,7 @@ jobs:
       LC_ALL: ${{ matrix.lc_all || '' }}
       PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
       PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
-      TEST_ARGS: ${{ matrix.test_args || '' }}
+      TEST_ARGS: ${{ matrix.test_args || '-W error:::pandas' }}
       PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
       IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
 
@@ -333,3 +333,13 @@ repos:
         additional_dependencies:
         - autotyping==22.9.0
         - libcst==0.4.7
+    -   id: check-test-naming
+        name: check that test names start with 'test'
+        entry: python -m scripts.check_test_naming
+        types: [python]
+        files: ^pandas/tests
+        language: python
+        exclude: |
+            (?x)
+            ^pandas/tests/generic/test_generic.py  # GH50380
+            |^pandas/tests/io/json/test_readlines.py  # GH50378
@@ -298,6 +298,7 @@ MultiIndex components
    MultiIndex.swaplevel
    MultiIndex.reorder_levels
    MultiIndex.remove_unused_levels
+   MultiIndex.drop
 
 MultiIndex selecting
 ~~~~~~~~~~~~~~~~~~~~
 
@@ -275,6 +275,9 @@ parse_dates : boolean or list of ints or names or list of lists or dict, default
 infer_datetime_format : boolean, default ``False``
   If ``True`` and parse_dates is enabled for a column, attempt to infer the
   datetime format to speed up the processing.
+
+  .. deprecated:: 2.0.0
+   A strict version of this argument is now the default, passing it has no effect.
 keep_date_col : boolean, default ``False``
   If ``True`` and parse_dates specifies combining multiple columns then keep the
   original columns.
@@ -916,12 +919,10 @@ an exception is raised, the next one is tried:
 
 Note that performance-wise, you should try these methods of parsing dates in order:
 
-1. Try to infer the format using ``infer_datetime_format=True`` (see section below).
-
-2. If you know the format, use ``pd.to_datetime()``:
+1. If you know the format, use ``pd.to_datetime()``:
    ``date_parser=lambda x: pd.to_datetime(x, format=...)``.
 
-3. If you have a really non-standard format, use a custom ``date_parser`` function.
+2. If you have a really non-standard format, use a custom ``date_parser`` function.
    For optimal performance, this should be vectorized, i.e., it should accept arrays
    as arguments.
 
 
@@ -28,10 +28,10 @@ The available extras, found in the :ref:`installation guide<install.dependencies
 ``[all, performance, computation, timezone, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql,
 sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`).
 
-.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_nullable_backend:
+.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_dtype_backend:
 
-Configuration option, ``mode.nullable_backend``, to return pyarrow-backed dtypes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
 
@@ -41,7 +41,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
 * :func:`read_sql_query`
 * :func:`read_sql_table`
 
-Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
+Additionally a new global configuration, ``mode.dtype_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
 
 * :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
@@ -50,12 +50,12 @@ to select the nullable dtypes implementation.
 * :func:`read_orc`
 
 
-And the following methods will also utilize the ``mode.nullable_backend`` option.
+And the following methods will also utilize the ``mode.dtype_backend`` option.
 
 * :meth:`DataFrame.convert_dtypes`
 * :meth:`Series.convert_dtypes`
 
-By default, ``mode.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
+By default, ``mode.dtype_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
 be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`, :issue:`49997`).
 
 .. ipython:: python
@@ -65,12 +65,12 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
         1,2.5,True,a,,,,,
         3,4.5,False,b,6,7.5,True,a,
     """)
-    with pd.option_context("mode.nullable_backend", "pandas"):
+    with pd.option_context("mode.dtype_backend", "pandas"):
         df = pd.read_csv(data, use_nullable_dtypes=True)
     df.dtypes
 
     data.seek(0)
-    with pd.option_context("mode.nullable_backend", "pyarrow"):
+    with pd.option_context("mode.dtype_backend", "pyarrow"):
         df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
     df_pyarrow.dtypes
 
@@ -717,13 +717,17 @@ Removal of prior version deprecations/changes
 - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
 - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
 - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
+- Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`)
 - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
 - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
 - Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`)
 - When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`)
 - Removed deprecated methods :meth:`ExcelWriter.write_cells`, :meth:`ExcelWriter.save`, :meth:`ExcelWriter.cur_sheet`, :meth:`ExcelWriter.handles`, :meth:`ExcelWriter.path` (:issue:`45795`)
 - The :class:`ExcelWriter` attribute ``book`` can no longer be set; it is still available to be accessed and mutated (:issue:`48943`)
 - Removed unused ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops (:issue:`47851`)
+- Removed the deprecated argument ``line_terminator`` from :meth:`DataFrame.to_csv` (:issue:`45302`)
+- Removed the deprecated argument ``label`` from :func:`lreshape` (:issue:`30219`)
+- Arguments after ``expr`` in :meth:`DataFrame.eval` and :meth:`DataFrame.query` are keyword-only (:issue:`47587`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -804,6 +808,7 @@ Datetimelike
 - Bug in :func:`to_datetime` was giving incorrect results when using ``format='%Y%m%d'`` and ``errors='ignore'`` (:issue:`26493`)
 - Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
 - Bug in :func:`to_datetime` was not raising ``ValueError`` when invalid format was passed and ``errors`` was ``'ignore'`` or ``'coerce'`` (:issue:`50266`)
+- Bug in :class:`DateOffset` was throwing ``TypeError`` when constructing with milliseconds and another super-daily argument (:issue:`49897`)
 -
 
 Timedelta
 
@@ -1080,11 +1080,11 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
 
         case JT_UTF8: {
             value = enc->getStringValue(obj, &tc, &szlen);
-            Buffer_Reserve(enc, RESERVE_STRING(szlen));
             if (enc->errorMsg) {
                 enc->endTypeContext(obj, &tc);
                 return;
             }
+            Buffer_Reserve(enc, RESERVE_STRING(szlen));
             Buffer_AppendCharUnchecked(enc, '\"');
 
             if (enc->forceASCII) {
 
@@ -298,43 +298,54 @@ _relativedelta_kwds = {"years", "months", "weeks", "days", "year", "month",
 
 
 cdef _determine_offset(kwds):
-    # timedelta is used for sub-daily plural offsets and all singular
-    # offsets, relativedelta is used for plural offsets of daily length or
-    # more, nanosecond(s) are handled by apply_wraps
-    kwds_no_nanos = dict(
-        (k, v) for k, v in kwds.items()
-        if k not in ("nanosecond", "nanoseconds")
-    )
-    # TODO: Are nanosecond and nanoseconds allowed somewhere?
-
-    _kwds_use_relativedelta = ("years", "months", "weeks", "days",
-                               "year", "month", "week", "day", "weekday",
-                               "hour", "minute", "second", "microsecond",
-                               "millisecond")
-
-    use_relativedelta = False
-    if len(kwds_no_nanos) > 0:
-        if any(k in _kwds_use_relativedelta for k in kwds_no_nanos):
-            if "millisecond" in kwds_no_nanos:
-                raise NotImplementedError(
-                    "Using DateOffset to replace `millisecond` component in "
-                    "datetime object is not supported. Use "
-                    "`microsecond=timestamp.microsecond % 1000 + ms * 1000` "
-                    "instead."
-                )
-            offset = relativedelta(**kwds_no_nanos)
-            use_relativedelta = True
-        else:
-            # sub-daily offset - use timedelta (tz-aware)
-            offset = timedelta(**kwds_no_nanos)
-    elif any(nano in kwds for nano in ("nanosecond", "nanoseconds")):
-        offset = timedelta(days=0)
-    else:
-        # GH 45643/45890: (historically) defaults to 1 day for non-nano
-        # since datetime.timedelta doesn't handle nanoseconds
-        offset = timedelta(days=1)
-    return offset, use_relativedelta
+    if not kwds:
+        # GH 45643/45890: (historically) defaults to 1 day
+        return timedelta(days=1), False
+
+    if "millisecond" in kwds:
+        raise NotImplementedError(
+            "Using DateOffset to replace `millisecond` component in "
+            "datetime object is not supported. Use "
+            "`microsecond=timestamp.microsecond % 1000 + ms * 1000` "
+            "instead."
+        )
+
+    nanos = {"nanosecond", "nanoseconds"}
+
+    # nanos are handled by apply_wraps
+    if all(k in nanos for k in kwds):
+        return timedelta(days=0), False
 
+    kwds_no_nanos = {k: v for k, v in kwds.items() if k not in nanos}
+
+    kwds_use_relativedelta = {
+        "year", "month", "day", "hour", "minute",
+        "second", "microsecond", "weekday", "years", "months", "weeks", "days",
+        "hours", "minutes", "seconds", "microseconds"
+    }
+
+    # "weeks" and "days" are left out despite being valid args for timedelta,
+    # because (historically) timedelta is used only for sub-daily.
+    kwds_use_timedelta = {
+        "seconds", "microseconds", "milliseconds", "minutes", "hours",
+    }
+
+    if all(k in kwds_use_timedelta for k in kwds_no_nanos):
+        # Sub-daily offset - use timedelta (tz-aware)
+        # This also handles "milliseconds" (plur): see GH 49897
+        return timedelta(**kwds_no_nanos), False
+
+    # convert milliseconds to microseconds, so relativedelta can parse it
+    if "milliseconds" in kwds_no_nanos:
+        micro = kwds_no_nanos.pop("milliseconds") * 1000
+        kwds_no_nanos["microseconds"] = kwds_no_nanos.get("microseconds", 0) + micro
+
+    if all(k in kwds_use_relativedelta for k in kwds_no_nanos):
+        return relativedelta(**kwds_no_nanos), True
+
+    raise ValueError(
+        f"Invalid argument/s or bad combination of arguments: {list(kwds.keys())}"
+    )
 
 # ---------------------------------------------------------------------
 # Mixins & Singletons
@@ -1163,7 +1174,6 @@ cdef class RelativeDeltaOffset(BaseOffset):
 
     def __init__(self, n=1, normalize=False, **kwds):
         BaseOffset.__init__(self, n, normalize)
-
         off, use_rd = _determine_offset(kwds)
         object.__setattr__(self, "_offset", off)
         object.__setattr__(self, "_use_relativedelta", use_rd)
 
@@ -996,9 +996,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
 
 cdef str _fill_token(token: str, padding: int):
     cdef str token_filled
-    if "." not in token:
+    if re.search(r"\d+\.\d+", token) is None:
+        # For example: 98
         token_filled = token.zfill(padding)
     else:
+        # For example: 00.123
         seconds, nanoseconds = token.split(".")
         seconds = f"{int(seconds):02d}"
         # right-pad so we get nanoseconds, then only take
 
@@ -1,4 +1,5 @@
 import collections
+import warnings
 
 cimport cython
 from cpython.object cimport (
@@ -1947,9 +1948,13 @@ class Timedelta(_Timedelta):
 
             if other.dtype.kind == "m":
                 # also timedelta-like
-                # TODO: could suppress
-                #  RuntimeWarning: invalid value encountered in floor_divide
-                result = self.asm8 // other
+                with warnings.catch_warnings():
+                    warnings.filterwarnings(
+                        "ignore",
+                        "invalid value encountered in floor_divide",
+                        RuntimeWarning
+                    )
+                    result = self.asm8 // other
                 mask = other.view("i8") == NPY_NAT
                 if mask.any():
                     # We differ from numpy here
@@ -1987,9 +1992,13 @@ class Timedelta(_Timedelta):
 
             if other.dtype.kind == "m":
                 # also timedelta-like
-                # TODO: could suppress
-                #  RuntimeWarning: invalid value encountered in floor_divide
-                result = other // self.asm8
+                with warnings.catch_warnings():
+                    warnings.filterwarnings(
+                        "ignore",
+                        "invalid value encountered in floor_divide",
+                        RuntimeWarning
+                    )
+                    result = other // self.asm8
                 mask = other.view("i8") == NPY_NAT
                 if mask.any():
                     # We differ from numpy here
 
@@ -853,6 +853,45 @@ def _concat_same_type(
         arr = pa.chunked_array(chunks)
         return cls(arr)
 
+    def _accumulate(
+        self, name: str, *, skipna: bool = True, **kwargs
+    ) -> ArrowExtensionArray | ExtensionArray:
+        """
+        Return an ExtensionArray performing an accumulation operation.
+
+        The underlying data type might change.
+
+        Parameters
+        ----------
+        name : str
+            Name of the function, supported values are:
+            - cummin
+            - cummax
+            - cumsum
+            - cumprod
+        skipna : bool, default True
+            If True, skip NA values.
+        **kwargs
+            Additional keyword arguments passed to the accumulation function.
+            Currently, there is no supported kwarg.
+
+        Returns
+        -------
+        array
+
+        Raises
+        ------
+        NotImplementedError : subclass does not define accumulations
+        """
+        pyarrow_name = {
+            "cumsum": "cumulative_sum_checked",
+        }.get(name, name)
+        pyarrow_meth = getattr(pc, pyarrow_name, None)
+        if pyarrow_meth is None:
+            return super()._accumulate(name, skipna=skipna, **kwargs)
+        result = pyarrow_meth(self._data, skip_nulls=skipna, **kwargs)
+        return type(self)(result)
+
     def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
         """
         Return a scalar result of performing the reduction operation.
 
@@ -727,12 +727,13 @@ def _cmp_method(self, other, op) -> BooleanArray:
             mask = np.ones(self._data.shape, dtype="bool")
         else:
             with warnings.catch_warnings():
-                # numpy may show a FutureWarning:
+                # numpy may show a FutureWarning or DeprecationWarning:
                 #     elementwise comparison failed; returning scalar instead,
                 #     but in the future will perform elementwise comparison
                 # before returning NotImplemented. We fall back to the correct
                 # behavior today, so that should be fine to ignore.
                 warnings.filterwarnings("ignore", "elementwise", FutureWarning)
+                warnings.filterwarnings("ignore", "elementwise", DeprecationWarning)
                 with np.errstate(all="ignore"):
                     method = getattr(self._data, f"__{op.__name__}__")
                     result = method(other)