Skip to content

Array formatting fixes for sparse and NEP-18 arrays. #3211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 16, 2019
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ markers =
slow: slow tests

[flake8]
max-line-length=88
ignore=
# whitespace before ':' - doesn't work well with black
E203
E402
# line too long - let black worry about that
E501
# do not assign a lambda expression, use a def
E731
# line break before binary operator
Expand Down
2 changes: 1 addition & 1 deletion xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ def cftime_range(
See Also
--------
pandas.date_range
""" # noqa: E501
"""
# Adapted from pandas.core.indexes.datetimes._generate_range.
if _count_not_none(start, end, periods, freq) != 3:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def to_datetimeindex(self, unsafe=False):
CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object')
>>> times.to_datetimeindex()
DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None)
""" # noqa: E501
"""
nptimes = cftime_to_nptime(self)
calendar = infer_calendar_name(self)
if calendar not in _STANDARD_CALENDARS and not unsafe:
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ def earth_mover_distance(first_samples,
.. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html
.. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
.. [3] http://xarray.pydata.org/en/stable/computation.html#wrapping-custom-computation
""" # noqa: E501 # don't error on that URL one line up
"""
from .groupby import GroupBy
from .dataarray import DataArray
from .variable import Variable
Expand Down
115 changes: 77 additions & 38 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .duck_array_ops import array_equiv
from .options import OPTIONS
from .pycompat import dask_array_type
from .pycompat import dask_array_type, sparse_array_type

try:
from pandas.errors import OutOfBoundsDatetime
Expand Down Expand Up @@ -211,9 +211,60 @@ def format_array_flat(array, max_width):
return pprint_str


def summarize_variable(
name, var, col_width, show_values=True, marker=" ", max_width=None
):
_KNOWN_TYPE_REPRS = {np.ndarray: "np.ndarray"}
with contextlib.suppress(ImportError):
import sparse

_KNOWN_TYPE_REPRS[sparse.COO] = "sparse.COO"


def inline_dask_repr(array):
"""Similar to dask.array.DataArray.__repr__, but without
redundant information that's already printed by the repr
function of the xarray wrapper.
"""
assert isinstance(array, dask_array_type), array

chunksize = tuple(c[0] for c in array.chunks)

if hasattr(array, "_meta"):
meta = array._meta
if type(meta) in _KNOWN_TYPE_REPRS:
meta_repr = _KNOWN_TYPE_REPRS[type(meta)]
else:
meta_repr = type(meta).__name__
meta_string = ", meta={}".format(meta_repr)
else:
meta_string = ""

return "dask.array<chunksize={}{}>".format(chunksize, meta_string)


def inline_sparse_repr(array):
"""Similar to sparse.COO.__repr__, but without the redundant shape/dtype."""
assert isinstance(array, sparse_array_type), array
return "<{}: nnz={:d}, fill_value={!s}>".format(
type(array).__name__, array.nnz, array.fill_value
)


def inline_variable_array_repr(var, max_width):
"""Build a one-line summary of a variable's data."""
if var._in_memory:
return format_array_flat(var, max_width)
elif isinstance(var._data, dask_array_type):
return inline_dask_repr(var.data)
elif isinstance(var._data, sparse_array_type):
return inline_sparse_repr(var.data)
elif hasattr(var._data, "__array_function__"):
return maybe_truncate(repr(var._data).replace("\n", " "), max_width)
else:
# internal xarray array type
return "..."


def summarize_variable(name, var, col_width, marker=" ", max_width=None):
"""Summarize a variable in one line, e.g., for the Dataset.__repr__."""
if max_width is None:
max_width = OPTIONS["display_width"]
first_col = pretty_print(" {} {} ".format(marker, name), col_width)
Expand All @@ -222,12 +273,9 @@ def summarize_variable(
else:
dims_str = ""
front_str = "{}{}{} ".format(first_col, dims_str, var.dtype)
if show_values:
values_str = format_array_flat(var, max_width - len(front_str))
elif isinstance(var._data, dask_array_type):
values_str = short_dask_repr(var, show_dtype=False)
else:
values_str = "..."

values_width = max_width - len(front_str)
values_str = inline_variable_array_repr(var, values_width)

return front_str + values_str

Expand All @@ -249,13 +297,11 @@ def _summarize_coord_levels(coord, col_width, marker="-"):


def summarize_datavar(name, var, col_width):
show_values = var._in_memory
return summarize_variable(name, var.variable, col_width, show_values)
return summarize_variable(name, var.variable, col_width)


def summarize_coord(name, var, col_width):
is_index = name in var.dims
show_values = var._in_memory
marker = "*" if is_index else " "
if is_index:
coord = var.variable.to_index_variable()
Expand All @@ -266,7 +312,7 @@ def summarize_coord(name, var, col_width):
_summarize_coord_levels(coord, col_width),
]
)
return summarize_variable(name, var.variable, col_width, show_values, marker)
return summarize_variable(name, var.variable, col_width, marker)


def summarize_attr(key, value, col_width=None):
Expand Down Expand Up @@ -361,14 +407,14 @@ def unindexed_dims_repr(dims, coords):
def set_numpy_options(*args, **kwargs):
original = np.get_printoptions()
np.set_printoptions(*args, **kwargs)
yield
np.set_printoptions(**original)

try:
yield
finally:
np.set_printoptions(**original)

def short_array_repr(array):

if not hasattr(array, "__array_function__"):
array = np.asarray(array)
def short_numpy_repr(array):
array = np.asarray(array)

# default to lower precision so a full (abbreviated) line can fit on
# one line with the default display_width
Expand All @@ -384,26 +430,19 @@ def short_array_repr(array):
return repr(array)


def short_dask_repr(array, show_dtype=True):
"""Similar to dask.array.DataArray.__repr__, but without
redundant information that's already printed by the repr
function of the xarray wrapper.
"""
chunksize = tuple(c[0] for c in array.chunks)
if show_dtype:
return "dask.array<shape={}, dtype={}, chunksize={}>".format(
array.shape, array.dtype, chunksize
)
else:
return "dask.array<shape={}, chunksize={}>".format(array.shape, chunksize)


def short_data_repr(array):
if isinstance(getattr(array, "variable", array)._data, dask_array_type):
return short_dask_repr(array)
"""Format "data" for DataArray and Variable."""
internal_data = getattr(array, "variable", array)._data
if isinstance(array, np.ndarray):
return short_numpy_repr(array)
elif hasattr(internal_data, "__array_function__") or isinstance(
internal_data, dask_array_type
):
return repr(array.data)
elif array._in_memory or array.size < 1e5:
return short_array_repr(array.data)
return short_numpy_repr(array)
else:
# internal xarray array type
return "[{} values with dtype={}]".format(array.size, array.dtype)


Expand Down Expand Up @@ -554,7 +593,7 @@ def diff_array_repr(a, b, compat):
summary.append(diff_dim_summary(a, b))

if not array_equiv(a.data, b.data):
temp = [wrap_indent(short_array_repr(obj), start=" ") for obj in (a, b)]
temp = [wrap_indent(short_numpy_repr(obj), start=" ") for obj in (a, b)]
diff_data_repr = [
ab_side + "\n" + ab_data_repr
for ab_side, ab_data_repr in zip(("L", "R"), temp)
Expand Down
4 changes: 3 additions & 1 deletion xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,9 @@ def _in_memory(self):

@property
def data(self):
if hasattr(self._data, "__array_function__"):
if hasattr(self._data, "__array_function__") or isinstance(
self._data, dask_array_type
):
return self._data
else:
return self.values
Expand Down
54 changes: 34 additions & 20 deletions xarray/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def test_binary_op(self):
def test_repr(self):
expected = dedent(
"""\
<xarray.Variable (x: 4, y: 6)>
dask.array<shape=(4, 6), dtype=float64, chunksize=(2, 2)>"""
<xarray.Variable (x: 4, y: 6)>
dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2)>"""
)
assert expected == repr(self.lazy_var)

Expand Down Expand Up @@ -491,41 +491,55 @@ def test_dot(self):
lazy = self.lazy_array.dot(self.lazy_array[0])
self.assertLazyAndAllClose(eager, lazy)

@pytest.mark.skipif(LooseVersion(dask.__version__) >= "2.0", reason="no meta")
def test_dataarray_repr_legacy(self):
data = build_dask_array("data")
nonindex_coord = build_dask_array("coord")
a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
expected = dedent(
"""\
<xarray.DataArray 'data' (x: 1)>
dask.array<data, shape=(1,), dtype=int64, chunksize=(1,)>
Coordinates:
y (x) int64 dask.array<chunksize=(1,)>
Dimensions without coordinates: x"""
)
assert expected == repr(a)
assert kernel_call_count == 0 # should not evaluate dask array

@pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta")
def test_dataarray_repr(self):
# Test that __repr__ converts the dask backend to numpy
# in neither the data variable nor the non-index coords
data = build_dask_array("data")
nonindex_coord = build_dask_array("coord")
a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
expected = dedent(
"""\
<xarray.DataArray 'data' (x: 1)>
dask.array<shape=(1,), dtype=int64, chunksize=(1,)>
Coordinates:
y (x) int64 dask.array<shape=(1,), chunksize=(1,)>
Dimensions without coordinates: x"""
<xarray.DataArray 'data' (x: 1)>
dask.array<data, shape=(1,), dtype=int64, chunksize=(1,)>
Coordinates:
y (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
Dimensions without coordinates: x"""
)
assert expected == repr(a)
assert kernel_call_count == 0
assert kernel_call_count == 0 # should not evaluate dask array

@pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta")
def test_dataset_repr(self):
# Test that pickling/unpickling converts the dask backend
# to numpy in neither the data variables nor the non-index coords
data = build_dask_array("data")
nonindex_coord = build_dask_array("coord")
ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)})
expected = dedent(
"""\
<xarray.Dataset>
Dimensions: (x: 1)
Coordinates:
y (x) int64 dask.array<shape=(1,), chunksize=(1,)>
Dimensions without coordinates: x
Data variables:
a (x) int64 dask.array<shape=(1,), chunksize=(1,)>"""
<xarray.Dataset>
Dimensions: (x: 1)
Coordinates:
y (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
Dimensions without coordinates: x
Data variables:
a (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>"""
)
assert expected == repr(ds)
assert kernel_call_count == 0
assert kernel_call_count == 0 # should not evaluate dask array

def test_dataarray_pickle(self):
# Test that pickling/unpickling converts the dask backend
Expand Down
48 changes: 26 additions & 22 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,30 +57,34 @@ def test_repr(self):
data_array = DataArray(v, coords, name="my_variable")
expected = dedent(
"""\
<xarray.DataArray 'my_variable' (time: 2, x: 3)>
array([[1, 2, 3],
[4, 5, 6]])
Coordinates:
* x (x) int64 0 1 2
other int64 0
Dimensions without coordinates: time
Attributes:
foo: bar"""
<xarray.DataArray 'my_variable' (time: 2, x: 3)>
array([[1, 2, 3],
[4, 5, 6]])
Coordinates:
* x (x) int64 0 1 2
other int64 0
Dimensions without coordinates: time
Attributes:
foo: bar"""
)
assert expected == repr(data_array)

def test_repr_multiindex(self):
expected = dedent(
"""\
<xarray.DataArray (x: 4)>
array([0, 1, 2, 3])
Coordinates:
* x (x) MultiIndex
- level_1 (x) object 'a' 'a' 'b' 'b'
- level_2 (x) int64 1 2 1 2"""
<xarray.DataArray (x: 4)>
array([0, 1, 2, 3])
Coordinates:
* x (x) MultiIndex
- level_1 (x) object 'a' 'a' 'b' 'b'
- level_2 (x) int64 1 2 1 2"""
)
assert expected == repr(self.mda)

@pytest.mark.skipif(
LooseVersion(np.__version__) < "1.15",
reason="old versions of numpy have different printing behavior",
)
def test_repr_multiindex_long(self):
mindex_long = pd.MultiIndex.from_product(
[["a", "b", "c", "d"], [1, 2, 3, 4, 5, 6, 7, 8]],
Expand All @@ -89,13 +93,13 @@ def test_repr_multiindex_long(self):
mda_long = DataArray(list(range(32)), coords={"x": mindex_long}, dims="x")
expected = dedent(
"""\
<xarray.DataArray (x: 32)>
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])
Coordinates:
* x (x) MultiIndex
- level_1 (x) object 'a' 'a' 'a' 'a' 'a' 'a' 'a' ... 'd' 'd' 'd' 'd' 'd' 'd'
- level_2 (x) int64 1 2 3 4 5 6 7 8 1 2 3 4 5 6 ... 4 5 6 7 8 1 2 3 4 5 6 7 8""" # noqa: E501
<xarray.DataArray (x: 32)>
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])
Coordinates:
* x (x) MultiIndex
- level_1 (x) object 'a' 'a' 'a' 'a' 'a' 'a' 'a' ... 'd' 'd' 'd' 'd' 'd' 'd'
- level_2 (x) int64 1 2 3 4 5 6 7 8 1 2 3 4 5 6 ... 4 5 6 7 8 1 2 3 4 5 6 7 8"""
)
assert expected == repr(mda_long)

Expand Down
Loading