Skip to content

Commit dba85bd

Browse files
crusaderkymax-sixty
authored andcommitted
Fix regression: IndexVariable.copy(deep=True) casts dtype=U to object (#3095)
* Fix regression: IndexVariable.copy(deep=True) casts dtype=U to object * What's New * Simplify copy() * Cosmetic * Code review
1 parent 7b76f16 commit dba85bd

File tree

4 files changed

+39
-21
lines changed

4 files changed

+39
-21
lines changed

doc/whats-new.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,10 @@ Enhancements
4646

4747
Bug fixes
4848
~~~~~~~~~
49-
50-
- Improved error handling and documentation for `.expand_dims()`
49+
- Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert
50+
unicode indices to dtype=object (:issue:`3094`).
51+
By `Guido Imperiale <https://github.com/crusaderky>`_.
52+
- Improved error handling and documentation for `.expand_dims()`
5153
read-only view.
5254
- Fix tests for big-endian systems (:issue:`3125`).
5355
By `Graham Inggs <https://github.com/ginggs>`_.

xarray/core/indexing.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
from collections import defaultdict
44
from contextlib import suppress
55
from datetime import timedelta
6-
from typing import Sequence
6+
from typing import Any, Tuple, Sequence, Union
77

88
import numpy as np
99
import pandas as pd
1010

1111
from . import duck_array_ops, nputils, utils
12+
from .npcompat import DTypeLike
1213
from .pycompat import dask_array_type, integer_types
1314
from .utils import is_dict_like
1415

@@ -1227,9 +1228,10 @@ def transpose(self, order):
12271228

12281229

12291230
class PandasIndexAdapter(ExplicitlyIndexedNDArrayMixin):
1230-
"""Wrap a pandas.Index to preserve dtypes and handle explicit indexing."""
1231+
"""Wrap a pandas.Index to preserve dtypes and handle explicit indexing.
1232+
"""
12311233

1232-
def __init__(self, array, dtype=None):
1234+
def __init__(self, array: Any, dtype: DTypeLike = None):
12331235
self.array = utils.safe_cast_to_index(array)
12341236
if dtype is None:
12351237
if isinstance(array, pd.PeriodIndex):
@@ -1241,13 +1243,15 @@ def __init__(self, array, dtype=None):
12411243
dtype = np.dtype('O')
12421244
else:
12431245
dtype = array.dtype
1246+
else:
1247+
dtype = np.dtype(dtype)
12441248
self._dtype = dtype
12451249

12461250
@property
1247-
def dtype(self):
1251+
def dtype(self) -> np.dtype:
12481252
return self._dtype
12491253

1250-
def __array__(self, dtype=None):
1254+
def __array__(self, dtype: DTypeLike = None) -> np.ndarray:
12511255
if dtype is None:
12521256
dtype = self.dtype
12531257
array = self.array
@@ -1258,11 +1262,18 @@ def __array__(self, dtype=None):
12581262
return np.asarray(array.values, dtype=dtype)
12591263

12601264
@property
1261-
def shape(self):
1265+
def shape(self) -> Tuple[int]:
12621266
# .shape is broken on pandas prior to v0.15.2
12631267
return (len(self.array),)
12641268

1265-
def __getitem__(self, indexer):
1269+
def __getitem__(
1270+
self, indexer
1271+
) -> Union[
1272+
NumpyIndexingAdapter,
1273+
np.ndarray,
1274+
np.datetime64,
1275+
np.timedelta64,
1276+
]:
12661277
key = indexer.tuple
12671278
if isinstance(key, tuple) and len(key) == 1:
12681279
# unpack key so it can index a pandas.Index object (pandas.Index
@@ -1299,9 +1310,20 @@ def __getitem__(self, indexer):
12991310

13001311
return result
13011312

1302-
def transpose(self, order):
1313+
def transpose(self, order) -> pd.Index:
13031314
return self.array # self.array should be always one-dimensional
13041315

1305-
def __repr__(self):
1316+
def __repr__(self) -> str:
13061317
return ('%s(array=%r, dtype=%r)'
13071318
% (type(self).__name__, self.array, self.dtype))
1319+
1320+
def copy(self, deep: bool = True) -> 'PandasIndexAdapter':
1321+
# Not the same as just writing `self.array.copy(deep=deep)`, as
1322+
# shallow copies of the underlying numpy.ndarrays become deep ones
1323+
# upon pickling
1324+
# >>> len(pickle.dumps((self.array, self.array)))
1325+
# 4000281
1326+
# >>> len(pickle.dumps((self.array, self.array.copy(deep=False))))
1327+
# 8000341
1328+
array = self.array.copy(deep=True) if deep else self.array
1329+
return PandasIndexAdapter(array, self._dtype)

xarray/core/variable.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,14 +1942,7 @@ def copy(self, deep=True, data=None):
19421942
data copied from original.
19431943
"""
19441944
if data is None:
1945-
if deep:
1946-
# self._data should be a `PandasIndexAdapter` instance at this
1947-
# point, which doesn't have a copy method, so make a deep copy
1948-
# of the underlying `pandas.MultiIndex` and create a new
1949-
# `PandasIndexAdapter` instance with it.
1950-
data = PandasIndexAdapter(self._data.array.copy(deep=True))
1951-
else:
1952-
data = self._data
1945+
data = self._data.copy(deep=deep)
19531946
else:
19541947
data = as_compatible_data(data)
19551948
if self.shape != data.shape:

xarray/tests/test_variable.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -477,8 +477,9 @@ def test_concat_mixed_dtypes(self):
477477
assert actual.dtype == object
478478

479479
@pytest.mark.parametrize('deep', [True, False])
480-
def test_copy(self, deep):
481-
v = self.cls('x', 0.5 * np.arange(10), {'foo': 'bar'})
480+
@pytest.mark.parametrize('astype', [float, int, str])
481+
def test_copy(self, deep, astype):
482+
v = self.cls('x', (0.5 * np.arange(10)).astype(astype), {'foo': 'bar'})
482483
w = v.copy(deep=deep)
483484
assert type(v) is type(w)
484485
assert_identical(v, w)

0 commit comments

Comments
 (0)