Skip to content

Commit f91c8a5

Browse files
committed
Merge remote-tracking branch 'upstream/master' into clipboard-fixup
2 parents 479208e + fe1803d commit f91c8a5

31 files changed

+1392
-1205
lines changed

.travis.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,9 @@ matrix:
3030
- python: 3.5
3131

3232
include:
33-
- dist: bionic
34-
# 18.04
35-
python: 3.8.0
33+
- dist: trusty
3634
env:
37-
- JOB="3.8-dev" PATTERN="(not slow and not network)"
35+
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
3836

3937
- dist: trusty
4038
env:
@@ -88,7 +86,7 @@ install:
8886
script:
8987
- echo "script start"
9088
- echo "$JOB"
91-
- if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
89+
- source activate pandas-dev
9290
- ci/run_tests.sh
9391

9492
after_script:

ci/azure/windows.yml

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,49 +11,45 @@ jobs:
1111
py36_np15:
1212
ENV_FILE: ci/deps/azure-windows-36.yaml
1313
CONDA_PY: "36"
14+
PATTERN: "not slow and not network"
1415

1516
py37_np141:
1617
ENV_FILE: ci/deps/azure-windows-37.yaml
1718
CONDA_PY: "37"
19+
PATTERN: "not slow and not network"
1820

1921
steps:
2022
- powershell: |
2123
Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
2224
Write-Host "##vso[task.prependpath]$HOME/miniconda3/bin"
2325
displayName: 'Add conda to PATH'
2426
- script: conda update -q -n base conda
25-
displayName: Update conda
26-
- script: |
27-
call activate
27+
displayName: 'Update conda'
28+
- bash: |
2829
conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
2930
displayName: 'Create anaconda environment'
30-
- script: |
31-
call activate pandas-dev
32-
call conda list
31+
- bash: |
32+
source activate pandas-dev
33+
conda list
3334
ci\\incremental\\build.cmd
3435
displayName: 'Build'
35-
- script: |
36-
call activate pandas-dev
37-
pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
36+
- bash: |
37+
source activate pandas-dev
38+
ci/run_tests.sh
3839
displayName: 'Test'
3940
- task: PublishTestResults@2
4041
inputs:
4142
testResultsFiles: 'test-data.xml'
4243
testRunTitle: 'Windows-$(CONDA_PY)'
4344
- powershell: |
44-
$junitXml = "test-data.xml"
45-
$(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
46-
if ($matches[1] -eq 0)
47-
{
45+
$(Get-Content "test-data.xml" | Out-String) -match 'failures="(.*?)"'
46+
if ($matches[1] -eq 0) {
4847
Write-Host "No test failures in test-data"
49-
}
50-
else
51-
{
52-
# note that this will produce $LASTEXITCODE=1
53-
Write-Error "$($matches[1]) tests failed"
48+
} else {
49+
Write-Error "$($matches[1]) tests failed" # will produce $LASTEXITCODE=1
5450
}
5551
displayName: 'Check for test failures'
56-
- script: |
52+
- bash: |
5753
source activate pandas-dev
5854
python ci/print_skipped.py
5955
displayName: 'Print skipped tests'

ci/build38.sh

Lines changed: 0 additions & 19 deletions
This file was deleted.

ci/deps/travis-38.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: pandas-dev
2+
channels:
3+
- defaults
4+
- conda-forge
5+
dependencies:
6+
- python=3.8.*
7+
- cython>=0.29.13
8+
- numpy
9+
- python-dateutil
10+
- nomkl
11+
- pytz
12+
# universal
13+
- pytest>=5.0.0
14+
- pytest-xdist>=1.29.0
15+
- hypothesis>=3.58.0
16+
- pip

ci/setup_env.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
#!/bin/bash -e
22

3-
if [ "$JOB" == "3.8-dev" ]; then
4-
/bin/bash ci/build38.sh
5-
exit 0
6-
fi
7-
83
# edit the locale file if needed
94
if [ -n "$LOCALE_OVERRIDE" ]; then
105
echo "Adding locale to the first line of pandas/__init__.py"

doc/source/development/extending.rst

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,48 @@ To use a test, subclass it:
251251
See https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/base/__init__.py
252252
for a list of all the tests available.
253253

254+
.. _extending.extension.arrow:
255+
256+
Compatibility with Apache Arrow
257+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
258+
259+
An ``ExtensionArray`` can support conversion to / from ``pyarrow`` arrays
260+
(and thus support for example serialization to the Parquet file format)
261+
by implementing two methods: ``ExtensionArray.__arrow_array__`` and
262+
``ExtensionDtype.__from_arrow__``.
263+
264+
The ``ExtensionArray.__arrow_array__`` ensures that ``pyarrow`` knowns how
265+
to convert the specific extension array into a ``pyarrow.Array`` (also when
266+
included as a column in a pandas DataFrame):
267+
268+
.. code-block:: python
269+
270+
class MyExtensionArray(ExtensionArray):
271+
...
272+
273+
def __arrow_array__(self, type=None):
274+
# convert the underlying array values to a pyarrow Array
275+
import pyarrow
276+
return pyarrow.array(..., type=type)
277+
278+
The ``ExtensionDtype.__from_arrow__`` method then controls the conversion
279+
back from pyarrow to a pandas ExtensionArray. This method receives a pyarrow
280+
``Array`` or ``ChunkedArray`` as only argument and is expected to return the
281+
appropriate pandas ``ExtensionArray`` for this dtype and the passed values:
282+
283+
.. code-block:: none
284+
285+
class ExtensionDtype:
286+
...
287+
288+
def __from_arrow__(self, array: pyarrow.Array/ChunkedArray) -> ExtensionArray:
289+
...
290+
291+
See more in the `Arrow documentation <https://arrow.apache.org/docs/python/extending_types.html>`__.
292+
293+
Those methods have been implemented for the nullable integer and string extension
294+
dtypes included in pandas, and ensure roundtrip to pyarrow and the Parquet file format.
295+
254296
.. _extension dtype dtypes: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/dtypes.py
255297
.. _extension dtype source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py
256298
.. _extension array source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py

doc/source/user_guide/io.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4716,6 +4716,9 @@ Several caveats.
47164716
* The ``pyarrow`` engine preserves the ``ordered`` flag of categorical dtypes with string types. ``fastparquet`` does not preserve the ``ordered`` flag.
47174717
* Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
47184718
on an attempt at serialization.
4719+
* The ``pyarrow`` engine preserves extension data types such as the nullable integer and string data
4720+
type (requiring pyarrow >= 1.0.0, and requiring the extension type to implement the needed protocols,
4721+
see the :ref:`extension types documentation <extending.extension.arrow>`).
47194722

47204723
You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``.
47214724
If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``,

doc/source/whatsnew/v1.0.0.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ Other enhancements
114114
- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`)
115115
- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
116116
- :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`)
117+
- Roundtripping DataFrames with nullable integer or string data types to parquet
118+
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
119+
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
117120

118121
Build Changes
119122
^^^^^^^^^^^^^
@@ -342,6 +345,7 @@ Numeric
342345
- :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`)
343346
- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`)
344347
- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`)
348+
- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`)
345349
- Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`)
346350
- Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`)
347351
- Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`)
@@ -432,6 +436,7 @@ Groupby/resample/rolling
432436

433437
-
434438
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
439+
- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
435440
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
436441
- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
437442
- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
@@ -451,6 +456,7 @@ Reshaping
451456
- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
452457
- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
453458
- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
459+
- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
454460

455461
Sparse
456462
^^^^^^

pandas/_libs/internals.pyx

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,14 @@
11
import cython
22
from cython import Py_ssize_t
33

4-
from cpython.object cimport PyObject
4+
from cpython.slice cimport PySlice_GetIndicesEx
55

66
cdef extern from "Python.h":
77
Py_ssize_t PY_SSIZE_T_MAX
88

99
import numpy as np
1010
from numpy cimport int64_t
1111

12-
cdef extern from "compat_helper.h":
13-
cdef int slice_get_indices(PyObject* s, Py_ssize_t length,
14-
Py_ssize_t *start, Py_ssize_t *stop,
15-
Py_ssize_t *step,
16-
Py_ssize_t *slicelength) except -1
17-
18-
1912
from pandas._libs.algos import ensure_int64
2013

2114

@@ -258,8 +251,8 @@ cpdef Py_ssize_t slice_len(
258251
if slc is None:
259252
raise TypeError("slc must be slice")
260253

261-
slice_get_indices(<PyObject *>slc, objlen,
262-
&start, &stop, &step, &length)
254+
PySlice_GetIndicesEx(slc, objlen,
255+
&start, &stop, &step, &length)
263256

264257
return length
265258

@@ -278,8 +271,8 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
278271
if slc is None:
279272
raise TypeError("slc should be a slice")
280273

281-
slice_get_indices(<PyObject *>slc, objlen,
282-
&start, &stop, &step, &length)
274+
PySlice_GetIndicesEx(slc, objlen,
275+
&start, &stop, &step, &length)
283276

284277
return start, stop, step, length
285278

pandas/_libs/reduction.pyx

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,8 @@ cdef class Reducer:
135135
else:
136136
res = self.f(chunk)
137137

138-
if (not _is_sparse_array(res) and hasattr(res, 'values')
139-
and util.is_array(res.values)):
140-
res = res.values
138+
# TODO: reason for not squeezing here?
139+
res = _extract_result(res, squeeze=False)
141140
if i == 0:
142141
# On the first pass, we check the output shape to see
143142
# if this looks like a reduction.
@@ -402,18 +401,17 @@ cdef class SeriesGrouper(_BaseGrouper):
402401
return result, counts
403402

404403

405-
cdef inline _extract_result(object res):
404+
cdef inline _extract_result(object res, bint squeeze=True):
406405
""" extract the result object, it might be a 0-dim ndarray
407406
or a len-1 0-dim, or a scalar """
408407
if (not _is_sparse_array(res) and hasattr(res, 'values')
409408
and util.is_array(res.values)):
410409
res = res.values
411-
if not np.isscalar(res):
412-
if util.is_array(res):
413-
if res.ndim == 0:
414-
res = res.item()
415-
elif res.ndim == 1 and len(res) == 1:
416-
res = res[0]
410+
if util.is_array(res):
411+
if res.ndim == 0:
412+
res = res.item()
413+
elif squeeze and res.ndim == 1 and len(res) == 1:
414+
res = res[0]
417415
return res
418416

419417

pandas/_libs/src/compat_helper.h

Lines changed: 0 additions & 45 deletions
This file was deleted.

0 commit comments

Comments
 (0)