pandas-dev
diff --git a/‎.travis.yml
Lines changed: 3 additions & 5 deletions b/‎.travis.yml
Lines changed: 3 additions & 5 deletions
diff --git a/‎ci/azure/windows.yml
Lines changed: 15 additions & 19 deletions b/‎ci/azure/windows.yml
Lines changed: 15 additions & 19 deletions
diff --git a/‎ci/build38.sh
Lines changed: 0 additions & 19 deletions b/‎ci/build38.sh
Lines changed: 0 additions & 19 deletions
diff --git a/‎ci/deps/travis-38.yaml
Lines changed: 16 additions & 0 deletions b/‎ci/deps/travis-38.yaml
Lines changed: 16 additions & 0 deletions
diff --git a/‎ci/setup_env.sh
Lines changed: 0 additions & 5 deletions b/‎ci/setup_env.sh
Lines changed: 0 additions & 5 deletions
diff --git a/‎doc/source/development/extending.rst
Lines changed: 42 additions & 0 deletions b/‎doc/source/development/extending.rst
Lines changed: 42 additions & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.0.0.rst
Lines changed: 6 additions & 0 deletions b/‎doc/source/whatsnew/v1.0.0.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎pandas/_libs/internals.pyx
Lines changed: 5 additions & 12 deletions b/‎pandas/_libs/internals.pyx
Lines changed: 5 additions & 12 deletions
diff --git a/‎pandas/_libs/reduction.pyx
Lines changed: 8 additions & 10 deletions b/‎pandas/_libs/reduction.pyx
Lines changed: 8 additions & 10 deletions
diff --git a/‎pandas/_libs/src/compat_helper.h
Lines changed: 0 additions & 45 deletions b/‎pandas/_libs/src/compat_helper.h
Lines changed: 0 additions & 45 deletions
@@ -30,11 +30,9 @@ matrix:
       - python: 3.5
 
     include:
-    - dist: bionic
-      # 18.04
-      python: 3.8.0
+    - dist: trusty
       env:
-        - JOB="3.8-dev" PATTERN="(not slow and not network)"
+        - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
 
     - dist: trusty
       env:
@@ -88,7 +86,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
+  - source activate pandas-dev
   - ci/run_tests.sh
 
 after_script:
 
@@ -11,49 +11,45 @@ jobs:
       py36_np15:
         ENV_FILE: ci/deps/azure-windows-36.yaml
         CONDA_PY: "36"
+        PATTERN: "not slow and not network"
 
       py37_np141:
         ENV_FILE: ci/deps/azure-windows-37.yaml
         CONDA_PY: "37"
+        PATTERN: "not slow and not network"
 
   steps:
     - powershell: |
         Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
         Write-Host "##vso[task.prependpath]$HOME/miniconda3/bin"
       displayName: 'Add conda to PATH'
     - script: conda update -q -n base conda
-      displayName: Update conda
-    - script: |
-        call activate
+      displayName: 'Update conda'
+    - bash: |
         conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
       displayName: 'Create anaconda environment'
-    - script: |
-        call activate pandas-dev
-        call conda list
+    - bash: |
+        source activate pandas-dev
+        conda list
         ci\\incremental\\build.cmd
       displayName: 'Build'
-    - script: |
-        call activate pandas-dev
-        pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
+    - bash: |
+        source activate pandas-dev
+        ci/run_tests.sh
       displayName: 'Test'
     - task: PublishTestResults@2
       inputs:
         testResultsFiles: 'test-data.xml'
         testRunTitle: 'Windows-$(CONDA_PY)'
     - powershell: |
-        $junitXml = "test-data.xml"
-        $(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
-        if ($matches[1] -eq 0)
-        {
+        $(Get-Content "test-data.xml" | Out-String) -match 'failures="(.*?)"'
+        if ($matches[1] -eq 0) {
           Write-Host "No test failures in test-data"
-        }
-        else
-        {
-          # note that this will produce $LASTEXITCODE=1
-          Write-Error "$($matches[1]) tests failed"
+        } else {
+          Write-Error "$($matches[1]) tests failed"  # will produce $LASTEXITCODE=1
         }
       displayName: 'Check for test failures'
-    - script: |
+    - bash: |
         source activate pandas-dev
         python ci/print_skipped.py
       displayName: 'Print skipped tests'
@@ -0,0 +1,16 @@
+name: pandas-dev
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - python=3.8.*
+  - cython>=0.29.13
+  - numpy
+  - python-dateutil
+  - nomkl
+  - pytz
+  # universal
+  - pytest>=5.0.0
+  - pytest-xdist>=1.29.0
+  - hypothesis>=3.58.0
+  - pip
@@ -1,10 +1,5 @@
 #!/bin/bash -e
 
-if [ "$JOB" == "3.8-dev" ]; then
-    /bin/bash ci/build38.sh
-    exit 0
-fi
-
 # edit the locale file if needed
 if [ -n "$LOCALE_OVERRIDE" ]; then
     echo "Adding locale to the first line of pandas/__init__.py"
 
@@ -251,6 +251,48 @@ To use a test, subclass it:
 See https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/base/__init__.py
 for a list of all the tests available.
 
+.. _extending.extension.arrow:
+
+Compatibility with Apache Arrow
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An ``ExtensionArray`` can support conversion to / from ``pyarrow`` arrays
+(and thus support for example serialization to the Parquet file format)
+by implementing two methods: ``ExtensionArray.__arrow_array__`` and
+``ExtensionDtype.__from_arrow__``.
+
+The ``ExtensionArray.__arrow_array__`` ensures that ``pyarrow`` knowns how
+to convert the specific extension array into a ``pyarrow.Array`` (also when
+included as a column in a pandas DataFrame):
+
+.. code-block:: python
+
+    class MyExtensionArray(ExtensionArray):
+        ...
+
+        def __arrow_array__(self, type=None):
+            # convert the underlying array values to a pyarrow Array
+            import pyarrow
+            return pyarrow.array(..., type=type)
+
+The ``ExtensionDtype.__from_arrow__`` method then controls the conversion
+back from pyarrow to a pandas ExtensionArray. This method receives a pyarrow
+``Array`` or ``ChunkedArray`` as only argument and is expected to return the
+appropriate pandas ``ExtensionArray`` for this dtype and the passed values:
+
+.. code-block:: none
+
+    class ExtensionDtype:
+        ...
+
+        def __from_arrow__(self, array: pyarrow.Array/ChunkedArray) -> ExtensionArray:
+            ...
+
+See more in the `Arrow documentation <https://arrow.apache.org/docs/python/extending_types.html>`__.
+
+Those methods have been implemented for the nullable integer and string extension
+dtypes included in pandas, and ensure roundtrip to pyarrow and the Parquet file format.
+
 .. _extension dtype dtypes: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/dtypes.py
 .. _extension dtype source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py
 .. _extension array source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py
 
@@ -4716,6 +4716,9 @@ Several caveats.
 * The ``pyarrow`` engine preserves the ``ordered`` flag of categorical dtypes with string types. ``fastparquet`` does not preserve the ``ordered`` flag.
 * Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
   on an attempt at serialization.
+* The ``pyarrow`` engine preserves extension data types such as the nullable integer and string data
+  type (requiring pyarrow >= 1.0.0, and requiring the extension type to implement the needed protocols,
+  see the :ref:`extension types documentation <extending.extension.arrow>`).
 
 You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``.
 If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``,
 
@@ -114,6 +114,9 @@ Other enhancements
 - Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`)
 - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
 - :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`)
+- Roundtripping DataFrames with nullable integer or string data types to parquet
+  (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
+  now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
 
 Build Changes
 ^^^^^^^^^^^^^
@@ -342,6 +345,7 @@ Numeric
 - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`)
 - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`)
 - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`)
+- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`)
 - Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`)
 - Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`)
 - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`)
@@ -432,6 +436,7 @@ Groupby/resample/rolling
 
 -
 - Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
+- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
 - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
 - Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
 - Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
@@ -451,6 +456,7 @@ Reshaping
 - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
 - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
 - Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
+- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
 
 Sparse
 ^^^^^^
 
@@ -1,21 +1,14 @@
 import cython
 from cython import Py_ssize_t
 
-from cpython.object cimport PyObject
+from cpython.slice cimport PySlice_GetIndicesEx
 
 cdef extern from "Python.h":
     Py_ssize_t PY_SSIZE_T_MAX
 
 import numpy as np
 from numpy cimport int64_t
 
-cdef extern from "compat_helper.h":
-    cdef int slice_get_indices(PyObject* s, Py_ssize_t length,
-                               Py_ssize_t *start, Py_ssize_t *stop,
-                               Py_ssize_t *step,
-                               Py_ssize_t *slicelength) except -1
-
-
 from pandas._libs.algos import ensure_int64
 
 
@@ -258,8 +251,8 @@ cpdef Py_ssize_t slice_len(
     if slc is None:
         raise TypeError("slc must be slice")
 
-    slice_get_indices(<PyObject *>slc, objlen,
-                      &start, &stop, &step, &length)
+    PySlice_GetIndicesEx(slc, objlen,
+                         &start, &stop, &step, &length)
 
     return length
 
@@ -278,8 +271,8 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
     if slc is None:
         raise TypeError("slc should be a slice")
 
-    slice_get_indices(<PyObject *>slc, objlen,
-                      &start, &stop, &step, &length)
+    PySlice_GetIndicesEx(slc, objlen,
+                         &start, &stop, &step, &length)
 
     return start, stop, step, length
 
 
@@ -135,9 +135,8 @@ cdef class Reducer:
                 else:
                     res = self.f(chunk)
 
-                if (not _is_sparse_array(res) and hasattr(res, 'values')
-                        and util.is_array(res.values)):
-                    res = res.values
+                # TODO: reason for not squeezing here?
+                res = _extract_result(res, squeeze=False)
                 if i == 0:
                     # On the first pass, we check the output shape to see
                     #  if this looks like a reduction.
@@ -402,18 +401,17 @@ cdef class SeriesGrouper(_BaseGrouper):
         return result, counts
 
 
-cdef inline _extract_result(object res):
+cdef inline _extract_result(object res, bint squeeze=True):
     """ extract the result object, it might be a 0-dim ndarray
         or a len-1 0-dim, or a scalar """
     if (not _is_sparse_array(res) and hasattr(res, 'values')
             and util.is_array(res.values)):
         res = res.values
-    if not np.isscalar(res):
-        if util.is_array(res):
-            if res.ndim == 0:
-                res = res.item()
-            elif res.ndim == 1 and len(res) == 1:
-                res = res[0]
+    if util.is_array(res):
+        if res.ndim == 0:
+            res = res.item()
+        elif squeeze and res.ndim == 1 and len(res) == 1:
+            res = res[0]
     return res