Skip to content

Commit e678ec9

Browse files
authored
Array formatting fixes for sparse and NEP-18 arrays. (#3211)
* Array formatting fixes for sparse and NEP-18 arrays. I also did a bit of cleanup (e.g., renaming methods) in xarray.core.formatting. Sparse arrays were previously not shown in the Dataset repr: <xarray.Dataset> Dimensions: (x: 4) Coordinates: y (x) int64 ... Dimensions without coordinates: x Data variables: a (x) float64 ...""" Now they are: <xarray.Dataset> Dimensions: (x: 4) Coordinates: y (x) int64 <COO: shape=(4,), nnz=3, fill_value=0> Dimensions without coordinates: x Data variables: a (x) float64 <COO: shape=(4,), nnz=4, fill_value=0.0>""" * Let black worry about lines that are too long, not flake8 * Test fixes * typo * Fix comment on repr * black * remove all references to E501 * skip dataarray repr test for old numpy * add comment to asserts * ensure checks for dask arrays work with old dask * formatting tweaks * fix skip * Test for old dask * black fix
1 parent af98280 commit e678ec9

11 files changed

+251
-155
lines changed

setup.cfg

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ markers =
1515
slow: slow tests
1616

1717
[flake8]
18-
max-line-length=88
1918
ignore=
2019
# whitespace before ':' - doesn't work well with black
2120
E203
2221
E402
22+
# line too long - let black worry about that
23+
E501
2324
# do not assign a lambda expression, use a def
2425
E731
2526
# line break before binary operator

xarray/coding/cftime_offsets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,7 @@ def cftime_range(
955955
See Also
956956
--------
957957
pandas.date_range
958-
""" # noqa: E501
958+
"""
959959
# Adapted from pandas.core.indexes.datetimes._generate_range.
960960
if _count_not_none(start, end, periods, freq) != 3:
961961
raise ValueError(

xarray/coding/cftimeindex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def to_datetimeindex(self, unsafe=False):
480480
CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object')
481481
>>> times.to_datetimeindex()
482482
DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None)
483-
""" # noqa: E501
483+
"""
484484
nptimes = cftime_to_nptime(self)
485485
calendar = infer_calendar_name(self)
486486
if calendar not in _STANDARD_CALENDARS and not unsafe:

xarray/core/computation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -960,7 +960,7 @@ def earth_mover_distance(first_samples,
960960
.. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html
961961
.. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
962962
.. [3] http://xarray.pydata.org/en/stable/computation.html#wrapping-custom-computation
963-
""" # noqa: E501 # don't error on that URL one line up
963+
"""
964964
from .groupby import GroupBy
965965
from .dataarray import DataArray
966966
from .variable import Variable

xarray/core/formatting.py

Lines changed: 77 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from .duck_array_ops import array_equiv
1212
from .options import OPTIONS
13-
from .pycompat import dask_array_type
13+
from .pycompat import dask_array_type, sparse_array_type
1414

1515
try:
1616
from pandas.errors import OutOfBoundsDatetime
@@ -211,9 +211,60 @@ def format_array_flat(array, max_width):
211211
return pprint_str
212212

213213

214-
def summarize_variable(
215-
name, var, col_width, show_values=True, marker=" ", max_width=None
216-
):
214+
_KNOWN_TYPE_REPRS = {np.ndarray: "np.ndarray"}
215+
with contextlib.suppress(ImportError):
216+
import sparse
217+
218+
_KNOWN_TYPE_REPRS[sparse.COO] = "sparse.COO"
219+
220+
221+
def inline_dask_repr(array):
222+
"""Similar to dask.array.DataArray.__repr__, but without
223+
redundant information that's already printed by the repr
224+
function of the xarray wrapper.
225+
"""
226+
assert isinstance(array, dask_array_type), array
227+
228+
chunksize = tuple(c[0] for c in array.chunks)
229+
230+
if hasattr(array, "_meta"):
231+
meta = array._meta
232+
if type(meta) in _KNOWN_TYPE_REPRS:
233+
meta_repr = _KNOWN_TYPE_REPRS[type(meta)]
234+
else:
235+
meta_repr = type(meta).__name__
236+
meta_string = ", meta={}".format(meta_repr)
237+
else:
238+
meta_string = ""
239+
240+
return "dask.array<chunksize={}{}>".format(chunksize, meta_string)
241+
242+
243+
def inline_sparse_repr(array):
244+
"""Similar to sparse.COO.__repr__, but without the redundant shape/dtype."""
245+
assert isinstance(array, sparse_array_type), array
246+
return "<{}: nnz={:d}, fill_value={!s}>".format(
247+
type(array).__name__, array.nnz, array.fill_value
248+
)
249+
250+
251+
def inline_variable_array_repr(var, max_width):
252+
"""Build a one-line summary of a variable's data."""
253+
if var._in_memory:
254+
return format_array_flat(var, max_width)
255+
elif isinstance(var._data, dask_array_type):
256+
return inline_dask_repr(var.data)
257+
elif isinstance(var._data, sparse_array_type):
258+
return inline_sparse_repr(var.data)
259+
elif hasattr(var._data, "__array_function__"):
260+
return maybe_truncate(repr(var._data).replace("\n", " "), max_width)
261+
else:
262+
# internal xarray array type
263+
return "..."
264+
265+
266+
def summarize_variable(name, var, col_width, marker=" ", max_width=None):
267+
"""Summarize a variable in one line, e.g., for the Dataset.__repr__."""
217268
if max_width is None:
218269
max_width = OPTIONS["display_width"]
219270
first_col = pretty_print(" {} {} ".format(marker, name), col_width)
@@ -222,12 +273,9 @@ def summarize_variable(
222273
else:
223274
dims_str = ""
224275
front_str = "{}{}{} ".format(first_col, dims_str, var.dtype)
225-
if show_values:
226-
values_str = format_array_flat(var, max_width - len(front_str))
227-
elif isinstance(var._data, dask_array_type):
228-
values_str = short_dask_repr(var, show_dtype=False)
229-
else:
230-
values_str = "..."
276+
277+
values_width = max_width - len(front_str)
278+
values_str = inline_variable_array_repr(var, values_width)
231279

232280
return front_str + values_str
233281

@@ -249,13 +297,11 @@ def _summarize_coord_levels(coord, col_width, marker="-"):
249297

250298

251299
def summarize_datavar(name, var, col_width):
252-
show_values = var._in_memory
253-
return summarize_variable(name, var.variable, col_width, show_values)
300+
return summarize_variable(name, var.variable, col_width)
254301

255302

256303
def summarize_coord(name, var, col_width):
257304
is_index = name in var.dims
258-
show_values = var._in_memory
259305
marker = "*" if is_index else " "
260306
if is_index:
261307
coord = var.variable.to_index_variable()
@@ -266,7 +312,7 @@ def summarize_coord(name, var, col_width):
266312
_summarize_coord_levels(coord, col_width),
267313
]
268314
)
269-
return summarize_variable(name, var.variable, col_width, show_values, marker)
315+
return summarize_variable(name, var.variable, col_width, marker)
270316

271317

272318
def summarize_attr(key, value, col_width=None):
@@ -361,14 +407,14 @@ def unindexed_dims_repr(dims, coords):
361407
def set_numpy_options(*args, **kwargs):
362408
original = np.get_printoptions()
363409
np.set_printoptions(*args, **kwargs)
364-
yield
365-
np.set_printoptions(**original)
366-
410+
try:
411+
yield
412+
finally:
413+
np.set_printoptions(**original)
367414

368-
def short_array_repr(array):
369415

370-
if not hasattr(array, "__array_function__"):
371-
array = np.asarray(array)
416+
def short_numpy_repr(array):
417+
array = np.asarray(array)
372418

373419
# default to lower precision so a full (abbreviated) line can fit on
374420
# one line with the default display_width
@@ -384,26 +430,19 @@ def short_array_repr(array):
384430
return repr(array)
385431

386432

387-
def short_dask_repr(array, show_dtype=True):
388-
"""Similar to dask.array.DataArray.__repr__, but without
389-
redundant information that's already printed by the repr
390-
function of the xarray wrapper.
391-
"""
392-
chunksize = tuple(c[0] for c in array.chunks)
393-
if show_dtype:
394-
return "dask.array<shape={}, dtype={}, chunksize={}>".format(
395-
array.shape, array.dtype, chunksize
396-
)
397-
else:
398-
return "dask.array<shape={}, chunksize={}>".format(array.shape, chunksize)
399-
400-
401433
def short_data_repr(array):
402-
if isinstance(getattr(array, "variable", array)._data, dask_array_type):
403-
return short_dask_repr(array)
434+
"""Format "data" for DataArray and Variable."""
435+
internal_data = getattr(array, "variable", array)._data
436+
if isinstance(array, np.ndarray):
437+
return short_numpy_repr(array)
438+
elif hasattr(internal_data, "__array_function__") or isinstance(
439+
internal_data, dask_array_type
440+
):
441+
return repr(array.data)
404442
elif array._in_memory or array.size < 1e5:
405-
return short_array_repr(array.data)
443+
return short_numpy_repr(array)
406444
else:
445+
# internal xarray array type
407446
return "[{} values with dtype={}]".format(array.size, array.dtype)
408447

409448

@@ -554,7 +593,7 @@ def diff_array_repr(a, b, compat):
554593
summary.append(diff_dim_summary(a, b))
555594

556595
if not array_equiv(a.data, b.data):
557-
temp = [wrap_indent(short_array_repr(obj), start=" ") for obj in (a, b)]
596+
temp = [wrap_indent(short_numpy_repr(obj), start=" ") for obj in (a, b)]
558597
diff_data_repr = [
559598
ab_side + "\n" + ab_data_repr
560599
for ab_side, ab_data_repr in zip(("L", "R"), temp)

xarray/core/variable.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,9 @@ def _in_memory(self):
317317

318318
@property
319319
def data(self):
320-
if hasattr(self._data, "__array_function__"):
320+
if hasattr(self._data, "__array_function__") or isinstance(
321+
self._data, dask_array_type
322+
):
321323
return self._data
322324
else:
323325
return self.values

xarray/tests/test_dask.py

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ def test_binary_op(self):
149149
def test_repr(self):
150150
expected = dedent(
151151
"""\
152-
<xarray.Variable (x: 4, y: 6)>
153-
dask.array<shape=(4, 6), dtype=float64, chunksize=(2, 2)>"""
152+
<xarray.Variable (x: 4, y: 6)>
153+
dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2)>"""
154154
)
155155
assert expected == repr(self.lazy_var)
156156

@@ -491,41 +491,55 @@ def test_dot(self):
491491
lazy = self.lazy_array.dot(self.lazy_array[0])
492492
self.assertLazyAndAllClose(eager, lazy)
493493

494+
@pytest.mark.skipif(LooseVersion(dask.__version__) >= "2.0", reason="no meta")
495+
def test_dataarray_repr_legacy(self):
496+
data = build_dask_array("data")
497+
nonindex_coord = build_dask_array("coord")
498+
a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
499+
expected = dedent(
500+
"""\
501+
<xarray.DataArray 'data' (x: 1)>
502+
dask.array<data, shape=(1,), dtype=int64, chunksize=(1,)>
503+
Coordinates:
504+
y (x) int64 dask.array<chunksize=(1,)>
505+
Dimensions without coordinates: x"""
506+
)
507+
assert expected == repr(a)
508+
assert kernel_call_count == 0 # should not evaluate dask array
509+
510+
@pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta")
494511
def test_dataarray_repr(self):
495-
# Test that __repr__ converts the dask backend to numpy
496-
# in neither the data variable nor the non-index coords
497512
data = build_dask_array("data")
498513
nonindex_coord = build_dask_array("coord")
499514
a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
500515
expected = dedent(
501516
"""\
502-
<xarray.DataArray 'data' (x: 1)>
503-
dask.array<shape=(1,), dtype=int64, chunksize=(1,)>
504-
Coordinates:
505-
y (x) int64 dask.array<shape=(1,), chunksize=(1,)>
506-
Dimensions without coordinates: x"""
517+
<xarray.DataArray 'data' (x: 1)>
518+
dask.array<data, shape=(1,), dtype=int64, chunksize=(1,)>
519+
Coordinates:
520+
y (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
521+
Dimensions without coordinates: x"""
507522
)
508523
assert expected == repr(a)
509-
assert kernel_call_count == 0
524+
assert kernel_call_count == 0 # should not evaluate dask array
510525

526+
@pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta")
511527
def test_dataset_repr(self):
512-
# Test that pickling/unpickling converts the dask backend
513-
# to numpy in neither the data variables nor the non-index coords
514528
data = build_dask_array("data")
515529
nonindex_coord = build_dask_array("coord")
516530
ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)})
517531
expected = dedent(
518532
"""\
519-
<xarray.Dataset>
520-
Dimensions: (x: 1)
521-
Coordinates:
522-
y (x) int64 dask.array<shape=(1,), chunksize=(1,)>
523-
Dimensions without coordinates: x
524-
Data variables:
525-
a (x) int64 dask.array<shape=(1,), chunksize=(1,)>"""
533+
<xarray.Dataset>
534+
Dimensions: (x: 1)
535+
Coordinates:
536+
y (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
537+
Dimensions without coordinates: x
538+
Data variables:
539+
a (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>"""
526540
)
527541
assert expected == repr(ds)
528-
assert kernel_call_count == 0
542+
assert kernel_call_count == 0 # should not evaluate dask array
529543

530544
def test_dataarray_pickle(self):
531545
# Test that pickling/unpickling converts the dask backend

xarray/tests/test_dataarray.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -57,30 +57,34 @@ def test_repr(self):
5757
data_array = DataArray(v, coords, name="my_variable")
5858
expected = dedent(
5959
"""\
60-
<xarray.DataArray 'my_variable' (time: 2, x: 3)>
61-
array([[1, 2, 3],
62-
[4, 5, 6]])
63-
Coordinates:
64-
* x (x) int64 0 1 2
65-
other int64 0
66-
Dimensions without coordinates: time
67-
Attributes:
68-
foo: bar"""
60+
<xarray.DataArray 'my_variable' (time: 2, x: 3)>
61+
array([[1, 2, 3],
62+
[4, 5, 6]])
63+
Coordinates:
64+
* x (x) int64 0 1 2
65+
other int64 0
66+
Dimensions without coordinates: time
67+
Attributes:
68+
foo: bar"""
6969
)
7070
assert expected == repr(data_array)
7171

7272
def test_repr_multiindex(self):
7373
expected = dedent(
7474
"""\
75-
<xarray.DataArray (x: 4)>
76-
array([0, 1, 2, 3])
77-
Coordinates:
78-
* x (x) MultiIndex
79-
- level_1 (x) object 'a' 'a' 'b' 'b'
80-
- level_2 (x) int64 1 2 1 2"""
75+
<xarray.DataArray (x: 4)>
76+
array([0, 1, 2, 3])
77+
Coordinates:
78+
* x (x) MultiIndex
79+
- level_1 (x) object 'a' 'a' 'b' 'b'
80+
- level_2 (x) int64 1 2 1 2"""
8181
)
8282
assert expected == repr(self.mda)
8383

84+
@pytest.mark.skipif(
85+
LooseVersion(np.__version__) < "1.15",
86+
reason="old versions of numpy have different printing behavior",
87+
)
8488
def test_repr_multiindex_long(self):
8589
mindex_long = pd.MultiIndex.from_product(
8690
[["a", "b", "c", "d"], [1, 2, 3, 4, 5, 6, 7, 8]],
@@ -89,13 +93,13 @@ def test_repr_multiindex_long(self):
8993
mda_long = DataArray(list(range(32)), coords={"x": mindex_long}, dims="x")
9094
expected = dedent(
9195
"""\
92-
<xarray.DataArray (x: 32)>
93-
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
94-
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])
95-
Coordinates:
96-
* x (x) MultiIndex
97-
- level_1 (x) object 'a' 'a' 'a' 'a' 'a' 'a' 'a' ... 'd' 'd' 'd' 'd' 'd' 'd'
98-
- level_2 (x) int64 1 2 3 4 5 6 7 8 1 2 3 4 5 6 ... 4 5 6 7 8 1 2 3 4 5 6 7 8""" # noqa: E501
96+
<xarray.DataArray (x: 32)>
97+
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
98+
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])
99+
Coordinates:
100+
* x (x) MultiIndex
101+
- level_1 (x) object 'a' 'a' 'a' 'a' 'a' 'a' 'a' ... 'd' 'd' 'd' 'd' 'd' 'd'
102+
- level_2 (x) int64 1 2 3 4 5 6 7 8 1 2 3 4 5 6 ... 4 5 6 7 8 1 2 3 4 5 6 7 8"""
99103
)
100104
assert expected == repr(mda_long)
101105

0 commit comments

Comments
 (0)