Skip to content

Commit 5a165b2

Browse files
committed
Merge pull request #134 from shoyer/time-conversion-fix-alt
Fix concatenating Variables with dtype=datetime64
2 parents 18cead4 + e9e1866 commit 5a165b2

File tree

6 files changed

+261
-86
lines changed

6 files changed

+261
-86
lines changed

test/test_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ def test(self):
2121
self.assertEqual(expected.dtype, actual.dtype)
2222

2323

24+
class TestArrayEquiv(TestCase):
25+
def test_0d(self):
26+
# verify our work around for pd.isnull not working for 0-dimensional
27+
# object arrays
28+
self.assertTrue(utils.array_equiv(0, np.array(0, dtype=object)))
29+
self.assertTrue(
30+
utils.array_equiv(np.nan, np.array(np.nan, dtype=object)))
31+
self.assertFalse(
32+
utils.array_equiv(0, np.array(1, dtype=object)))
33+
34+
2435
class TestDictionaries(TestCase):
2536
def setUp(self):
2637
self.x = {'a': 'A', 'b': 'B'}

test/test_variable.py

Lines changed: 175 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
import numpy as np
77
import pandas as pd
88

9-
from xray import Variable, Dataset, DataArray
9+
from xray import Variable, Dataset, DataArray, indexing
1010
from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter,
11-
PandasIndexAdapter)
11+
PandasIndexAdapter, _as_compatible_data)
12+
from xray.pycompat import PY3
1213

1314
from . import TestCase, source_ndarray
1415

@@ -36,32 +37,89 @@ def test_attrs(self):
3637
v.attrs['foo'] = 'baz'
3738
self.assertEqual(v.attrs['foo'], 'baz')
3839

39-
def test_0d_data(self):
40-
d = datetime(2000, 1, 1)
41-
for value, dtype in [(0, int),
42-
(np.float32(0.5), np.float32),
43-
('foo', np.str_),
44-
(d, None),
45-
(np.datetime64(d), np.datetime64)]:
40+
def assertIndexedLikeNDArray(self, variable, expected_value0,
41+
expected_dtype=None):
42+
"""Given a 1-dimensional variable, verify that the variable is indexed
43+
like a numpy.ndarray.
44+
"""
45+
self.assertEqual(variable[0].shape, ())
46+
self.assertEqual(variable[0].ndim, 0)
47+
self.assertEqual(variable[0].size, 1)
48+
# test identity
49+
self.assertTrue(variable.equals(variable.copy()))
50+
self.assertTrue(variable.identical(variable.copy()))
51+
# check value is equal for both ndarray and Variable
52+
self.assertEqual(variable.values[0], expected_value0)
53+
self.assertEqual(variable[0].values, expected_value0)
54+
# check type or dtype is consistent for both ndarray and Variable
55+
if expected_dtype is None:
56+
# check output type instead of array dtype
57+
self.assertEqual(type(variable.values[0]), type(expected_value0))
58+
self.assertEqual(type(variable[0].values), type(expected_value0))
59+
else:
60+
self.assertEqual(variable.values[0].dtype, expected_dtype)
61+
self.assertEqual(variable[0].values.dtype, expected_dtype)
62+
63+
def test_index_0d_int(self):
64+
for value, dtype in [(0, np.int_),
65+
(np.int32(0), np.int32)]:
66+
x = self.cls(['x'], [value])
67+
self.assertIndexedLikeNDArray(x, value, dtype)
68+
69+
def test_index_0d_float(self):
70+
for value, dtype in [(0.5, np.float_),
71+
(np.float32(0.5), np.float32)]:
72+
x = self.cls(['x'], [value])
73+
self.assertIndexedLikeNDArray(x, value, dtype)
74+
75+
def test_index_0d_string(self):
76+
for value, dtype in [('foo', np.dtype('U3' if PY3 else 'S3')),
77+
(u'foo', np.dtype('U3'))]:
4678
x = self.cls(['x'], [value])
47-
# check array properties
48-
self.assertEqual(x[0].shape, ())
49-
self.assertEqual(x[0].ndim, 0)
50-
self.assertEqual(x[0].size, 1)
51-
# test identity
52-
self.assertTrue(x.equals(x.copy()))
53-
self.assertTrue(x.identical(x.copy()))
54-
# check value is equal for both ndarray and Variable
55-
self.assertEqual(x.values[0], value)
56-
self.assertEqual(x[0].values, value)
57-
# check type or dtype is consistent for both ndarray and Variable
58-
if dtype is None:
59-
# check output type instead of array dtype
60-
self.assertEqual(type(x.values[0]), type(value))
61-
self.assertEqual(type(x[0].values), type(value))
62-
else:
63-
assert np.issubdtype(x.values[0].dtype, dtype), (x.values[0].dtype, dtype)
64-
assert np.issubdtype(x[0].values.dtype, dtype), (x[0].values.dtype, dtype)
79+
self.assertIndexedLikeNDArray(x, value, dtype)
80+
81+
def test_index_0d_datetime(self):
82+
d = datetime(2000, 1, 1)
83+
x = self.cls(['x'], [d])
84+
self.assertIndexedLikeNDArray(x, d)
85+
86+
x = self.cls(['x'], [np.datetime64(d)])
87+
self.assertIndexedLikeNDArray(x, np.datetime64(d), 'datetime64[ns]')
88+
89+
x = self.cls(['x'], pd.DatetimeIndex([d]))
90+
self.assertIndexedLikeNDArray(x, np.datetime64(d), 'datetime64[ns]')
91+
92+
def test_index_0d_object(self):
93+
94+
class HashableItemWrapper(object):
95+
def __init__(self, item):
96+
self.item = item
97+
98+
def __eq__(self, other):
99+
return self.item == other.item
100+
101+
def __hash__(self):
102+
return hash(self.item)
103+
104+
def __repr__(self):
105+
return '%s(item=%r)' % (type(self).__name__, self.item)
106+
107+
item = HashableItemWrapper((1, 2, 3))
108+
x = self.cls('x', [item])
109+
self.assertIndexedLikeNDArray(x, item)
110+
111+
def test_index_and_concat_datetime(self):
112+
# regression test for #125
113+
date_range = pd.date_range('2011-09-01', periods=10)
114+
for dates in [date_range, date_range.values,
115+
date_range.to_pydatetime()]:
116+
expected = self.cls('t', dates)
117+
for times in [[expected[i] for i in range(10)],
118+
[expected[i:(i + 1)] for i in range(10)],
119+
[expected[[i]] for i in range(10)]]:
120+
actual = Variable.concat(times, 't')
121+
self.assertEqual(expected.dtype, actual.dtype)
122+
self.assertArrayEqual(expected, actual)
65123

66124
def test_0d_time_data(self):
67125
# regression test for #105
@@ -229,6 +287,39 @@ def test_item(self):
229287
self.assertEqual(v.item(), 0)
230288
self.assertIs(type(v.item()), float)
231289

290+
def test_datetime64_conversion(self):
291+
# verify that datetime64 is always converted to ns precision with
292+
# sources preserved
293+
values = np.datetime64('2000-01-01T00')
294+
v = Variable([], values)
295+
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
296+
self.assertEqual(v.values, values)
297+
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
298+
299+
values = pd.date_range('2000-01-01', periods=3).values.astype(
300+
'datetime64[s]')
301+
v = Variable(['t'], values)
302+
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
303+
self.assertArrayEqual(v.values, values)
304+
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
305+
self.assertIsNot(source_ndarray(v.values), values)
306+
307+
values = pd.date_range('2000-01-01', periods=3).values.copy()
308+
v = Variable(['t'], values)
309+
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
310+
self.assertArrayEqual(v.values, values)
311+
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
312+
self.assertIs(source_ndarray(v.values), values)
313+
314+
def test_0d_str(self):
315+
v = Variable([], u'foo')
316+
self.assertEqual(v.dtype, np.dtype('U3'))
317+
self.assertEqual(v.values, 'foo')
318+
319+
v = Variable([], np.string_('foo'))
320+
self.assertEqual(v.dtype, np.dtype('S3'))
321+
self.assertEqual(v.values, bytes('foo', 'ascii') if PY3 else 'foo')
322+
232323
def test_equals_and_identical(self):
233324
d = np.random.rand(10, 3)
234325
d[0, 0] = np.nan
@@ -463,3 +554,60 @@ def test_data(self):
463554
self.assertIsInstance(x._data, PandasIndexAdapter)
464555
with self.assertRaisesRegexp(TypeError, 'cannot be modified'):
465556
x[:] = 0
557+
558+
def test_avoid_index_dtype_inference(self):
559+
# verify our work-around for (pandas<0.14):
560+
# https://github.com/pydata/pandas/issues/6370
561+
data = pd.date_range('2000-01-01', periods=3).to_pydatetime()
562+
t = Coordinate('t', data)
563+
self.assertArrayEqual(t.values[:2], data[:2])
564+
self.assertArrayEqual(t[:2].values, data[:2])
565+
self.assertArrayEqual(t.values[:2], data[:2])
566+
self.assertArrayEqual(t[:2].values, data[:2])
567+
self.assertEqual(t.dtype, object)
568+
self.assertEqual(t[:2].dtype, object)
569+
570+
571+
class TestAsCompatibleData(TestCase):
572+
def test_unchanged_types(self):
573+
types = (NumpyArrayAdapter, PandasIndexAdapter,
574+
indexing.LazilyIndexedArray)
575+
for t in types:
576+
for data in [np.arange(3),
577+
pd.date_range('2000-01-01', periods=3),
578+
pd.date_range('2000-01-01', periods=3).values]:
579+
x = t(data)
580+
self.assertIs(x, _as_compatible_data(x))
581+
582+
def test_converted_types(self):
583+
for input_array in [[[0, 1, 2]], pd.DataFrame([[0, 1, 2]])]:
584+
actual = _as_compatible_data(input_array)
585+
self.assertArrayEqual(np.asarray(input_array), actual)
586+
self.assertEqual(NumpyArrayAdapter, type(actual))
587+
self.assertEqual(np.dtype(int), actual.dtype)
588+
589+
def test_datetime(self):
590+
expected = np.datetime64('2000-01-01T00')
591+
actual = _as_compatible_data(expected)
592+
self.assertEqual(expected, actual)
593+
self.assertEqual(np.datetime64, type(actual))
594+
self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
595+
596+
expected = np.array([np.datetime64('2000-01-01T00')])
597+
actual = _as_compatible_data(expected)
598+
self.assertEqual(np.asarray(expected), actual)
599+
self.assertEqual(NumpyArrayAdapter, type(actual))
600+
self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
601+
602+
expected = np.array([np.datetime64('2000-01-01T00', 'ns')])
603+
actual = _as_compatible_data(expected)
604+
self.assertEqual(np.asarray(expected), actual)
605+
self.assertEqual(NumpyArrayAdapter, type(actual))
606+
self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
607+
self.assertIs(expected, source_ndarray(np.asarray(actual)))
608+
609+
expected = pd.Timestamp('2000-01-01T00').to_datetime()
610+
actual = _as_compatible_data(expected)
611+
self.assertEqual(np.asarray(expected), actual)
612+
self.assertEqual(NumpyArrayAdapter, type(actual))
613+
self.assertEqual(np.dtype('O'), actual.dtype)

xray/backends/netCDF4_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .netcdf3 import encode_nc3_variable
88
import xray
99
from xray.conventions import encode_cf_variable
10-
from xray.utils import FrozenOrderedDict, NDArrayMixin, as_array_or_item
10+
from xray.utils import FrozenOrderedDict, NDArrayMixin
1111
from xray import indexing
1212
from xray.pycompat import iteritems, basestring
1313

@@ -31,7 +31,7 @@ def __getitem__(self, key):
3131
# work around for netCDF4-python's broken handling of 0-d
3232
# arrays (slicing them always returns a 1-dimensional array):
3333
# https://github.com/Unidata/netcdf4-python/pull/220
34-
data = as_array_or_item(np.asscalar(self.array[key]))
34+
data = np.asscalar(self.array[key])
3535
else:
3636
data = self.array[key]
3737
return data

xray/indexing.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,12 +212,8 @@ def shape(self):
212212
shape.append(k.size)
213213
return tuple(shape)
214214

215-
@property
216-
def values(self):
217-
return self.array[self.key]
218-
219215
def __array__(self, dtype=None):
220-
return np.asarray(self.values, dtype=None)
216+
return np.asarray(self.array[self.key], dtype=None)
221217

222218
def __getitem__(self, key):
223219
return type(self)(self.array, self._updated_key(key))

xray/utils.py

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import operator
55
import warnings
66
from collections import OrderedDict, Mapping, MutableMapping
7-
from datetime import datetime
87

98
import numpy as np
109
import pandas as pd
@@ -36,35 +35,6 @@ def __new__(cls, *args, **kwargs):
3635
return Wrapper
3736

3837

39-
def as_safe_array(values, dtype=None):
40-
"""Like np.asarray, but convert all datetime64 arrays to ns precision
41-
"""
42-
values = np.asarray(values, dtype=dtype)
43-
if values.dtype.kind == 'M':
44-
# np.datetime64
45-
values = values.astype('datetime64[ns]')
46-
return values
47-
48-
49-
def as_array_or_item(values, dtype=None):
50-
"""Return the given values as a numpy array of the indicated dtype, or as
51-
an individual value if it's a 0-dimensional object array or datetime.
52-
"""
53-
if isinstance(values, datetime):
54-
# shortcut because if you try to make a datetime or Timestamp object
55-
# into an array with the proper dtype, it is liable to be silently
56-
# converted into an integer instead :(
57-
return values
58-
values = as_safe_array(values, dtype=dtype)
59-
if values.ndim == 0 and values.dtype.kind == 'O':
60-
# unpack 0d object arrays to be consistent with numpy
61-
values = values.item()
62-
if isinstance(values, pd.Timestamp):
63-
# turn Timestamps back into datetime64 objects
64-
values = np.datetime64(values, 'ns')
65-
return values
66-
67-
6838
def squeeze(xray_obj, dimensions, dimension=None):
6939
"""Squeeze the dimensions of an xray object."""
7040
if dimension is None:
@@ -93,11 +63,22 @@ def array_equiv(arr1, arr2):
9363
arr1, arr2 = np.asarray(arr1), np.asarray(arr2)
9464
if arr1.shape != arr2.shape:
9565
return False
96-
# we could make this faster by not-checking for null values if the dtype
97-
# does not support them, but the logic would get more convoluted.
98-
# using pd.isnull lets us defer the NaN handling to pandas (and unlike
99-
# np.isnan it works on every dtype).
100-
return ((arr1 == arr2) | (pd.isnull(arr1) & pd.isnull(arr2))).all()
66+
if arr1.ndim == 0:
67+
# work around for pd.isnull not working for 0-dimensional object
68+
# arrays: https://github.com/pydata/pandas/pull/7176 (should be fixed
69+
# in pandas 0.14)
70+
# use .item() instead of keeping around 0-dimensional arrays to avoid
71+
# the numpy quirk where object arrays are checked as equal by identity
72+
# (hence NaN in an object array is equal to itself):
73+
arr1 = arr1.item()
74+
arr2 = arr2.item()
75+
return arr1 == arr2 or (arr1 != arr1 and arr2 != arr2)
76+
else:
77+
# we could make this faster by not-checking for null values if the
78+
# dtype does not support them, but the logic would get more convoluted.
79+
# using pd.isnull lets us defer the NaN handling to pandas (and unlike
80+
# np.isnan it works on every dtype).
81+
return ((arr1 == arr2) | (pd.isnull(arr1) & pd.isnull(arr2))).all()
10182

10283

10384
def safe_cast_to_index(array):

0 commit comments

Comments
 (0)