pydata · shoyer · Aug 16, 2019 · Aug 13, 2019 · Aug 13, 2019 · Aug 13, 2019
diff --git a/setup.cfg b/setup.cfg
@@ -15,11 +15,12 @@ markers =
     slow: slow tests
 
 [flake8]
-max-line-length=88
 ignore=
     # whitespace before ':' - doesn't work well with black
     E203
     E402
+    # line too long - let black worry about that
+    E501
     # do not assign a lambda expression, use a def
     E731
     # line break before binary operator

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
@@ -955,7 +955,7 @@ def cftime_range(
     See Also
     --------
     pandas.date_range
-    """  # noqa: E501
+    """
     # Adapted from pandas.core.indexes.datetimes._generate_range.
     if _count_not_none(start, end, periods, freq) != 3:
         raise ValueError(

diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
@@ -480,7 +480,7 @@ def to_datetimeindex(self, unsafe=False):
         CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object')
         >>> times.to_datetimeindex()
         DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None)
-        """  # noqa: E501
+        """
         nptimes = cftime_to_nptime(self)
         calendar = infer_calendar_name(self)
         if calendar not in _STANDARD_CALENDARS and not unsafe:

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
@@ -960,7 +960,7 @@ def earth_mover_distance(first_samples,
     .. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html
     .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
     .. [3] http://xarray.pydata.org/en/stable/computation.html#wrapping-custom-computation
-    """  # noqa: E501  # don't error on that URL one line up
+    """
     from .groupby import GroupBy
     from .dataarray import DataArray
     from .variable import Variable

diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
@@ -10,7 +10,7 @@
 
 from .duck_array_ops import array_equiv
 from .options import OPTIONS
-from .pycompat import dask_array_type
+from .pycompat import dask_array_type, sparse_array_type
 
 try:
     from pandas.errors import OutOfBoundsDatetime
@@ -211,9 +211,60 @@ def format_array_flat(array, max_width):
     return pprint_str
 
 
-def summarize_variable(
-    name, var, col_width, show_values=True, marker=" ", max_width=None
-):
+_KNOWN_TYPE_REPRS = {np.ndarray: "np.ndarray"}
+with contextlib.suppress(ImportError):
+    import sparse
+
+    _KNOWN_TYPE_REPRS[sparse.COO] = "sparse.COO"
+
+
+def inline_dask_repr(array):
+    """Similar to dask.array.DataArray.__repr__, but without
+    redundant information that's already printed by the repr
+    function of the xarray wrapper.
+    """
+    assert isinstance(array, dask_array_type), array
+
+    chunksize = tuple(c[0] for c in array.chunks)
+
+    if hasattr(array, "_meta"):
+        meta = array._meta
+        if type(meta) in _KNOWN_TYPE_REPRS:
+            meta_repr = _KNOWN_TYPE_REPRS[type(meta)]
+        else:
+            meta_repr = type(meta).__name__
+        meta_string = ", meta={}".format(meta_repr)
+    else:
+        meta_string = ""
+
+    return "dask.array<chunksize={}{}>".format(chunksize, meta_string)
+
+
+def inline_sparse_repr(array):
+    """Similar to sparse.COO.__repr__, but without the redundant shape/dtype."""
+    assert isinstance(array, sparse_array_type), array
+    return "<{}: nnz={:d}, fill_value={!s}>".format(
+        type(array).__name__, array.nnz, array.fill_value
+    )
+
+
+def inline_variable_array_repr(var, max_width):
+    """Build a one-line summary of a variable's data."""
+    if var._in_memory:
+        return format_array_flat(var, max_width)
+    elif isinstance(var._data, dask_array_type):
+        return inline_dask_repr(var.data)
+    elif isinstance(var._data, sparse_array_type):
+        return inline_sparse_repr(var.data)
+    elif hasattr(var._data, "__array_function__"):
+        return maybe_truncate(repr(var._data).replace("\n", " "), max_width)
+    else:
+        # internal xarray array type
+        return "..."
+
+
+def summarize_variable(name, var, col_width, marker=" ", max_width=None):
+    """Summarize a variable in one line, e.g., for the Dataset.__repr__."""
     if max_width is None:
         max_width = OPTIONS["display_width"]
     first_col = pretty_print("  {} {} ".format(marker, name), col_width)
@@ -222,12 +273,9 @@ def summarize_variable(
     else:
         dims_str = ""
     front_str = "{}{}{} ".format(first_col, dims_str, var.dtype)
-    if show_values:
-        values_str = format_array_flat(var, max_width - len(front_str))
-    elif isinstance(var._data, dask_array_type):
-        values_str = short_dask_repr(var, show_dtype=False)
-    else:
-        values_str = "..."
+
+    values_width = max_width - len(front_str)
+    values_str = inline_variable_array_repr(var, values_width)
 
     return front_str + values_str
 
@@ -249,13 +297,11 @@ def _summarize_coord_levels(coord, col_width, marker="-"):
 
 
 def summarize_datavar(name, var, col_width):
-    show_values = var._in_memory
-    return summarize_variable(name, var.variable, col_width, show_values)
+    return summarize_variable(name, var.variable, col_width)
 
 
 def summarize_coord(name, var, col_width):
     is_index = name in var.dims
-    show_values = var._in_memory
     marker = "*" if is_index else " "
     if is_index:
         coord = var.variable.to_index_variable()
@@ -266,7 +312,7 @@ def summarize_coord(name, var, col_width):
                     _summarize_coord_levels(coord, col_width),
                 ]
             )
-    return summarize_variable(name, var.variable, col_width, show_values, marker)
+    return summarize_variable(name, var.variable, col_width, marker)
 
 
 def summarize_attr(key, value, col_width=None):
@@ -361,14 +407,14 @@ def unindexed_dims_repr(dims, coords):
 def set_numpy_options(*args, **kwargs):
     original = np.get_printoptions()
     np.set_printoptions(*args, **kwargs)
-    yield
-    np.set_printoptions(**original)
-
+    try:
+        yield
+    finally:
+        np.set_printoptions(**original)
 
-def short_array_repr(array):
 
-    if not hasattr(array, "__array_function__"):
-        array = np.asarray(array)
+def short_numpy_repr(array):
+    array = np.asarray(array)
 
     # default to lower precision so a full (abbreviated) line can fit on
     # one line with the default display_width
@@ -384,26 +430,19 @@ def short_array_repr(array):
         return repr(array)
 
 
-def short_dask_repr(array, show_dtype=True):
-    """Similar to dask.array.DataArray.__repr__, but without
-    redundant information that's already printed by the repr
-    function of the xarray wrapper.
-    """
-    chunksize = tuple(c[0] for c in array.chunks)
-    if show_dtype:
-        return "dask.array<shape={}, dtype={}, chunksize={}>".format(
-            array.shape, array.dtype, chunksize
-        )
-    else:
-        return "dask.array<shape={}, chunksize={}>".format(array.shape, chunksize)
-
-
 def short_data_repr(array):
-    if isinstance(getattr(array, "variable", array)._data, dask_array_type):
-        return short_dask_repr(array)
+    """Format "data" for DataArray and Variable."""
+    internal_data = getattr(array, "variable", array)._data
+    if isinstance(array, np.ndarray):
+        return short_numpy_repr(array)
+    elif hasattr(internal_data, "__array_function__") or isinstance(
+        internal_data, dask_array_type
+    ):
+        return repr(array.data)
     elif array._in_memory or array.size < 1e5:
-        return short_array_repr(array.data)
+        return short_numpy_repr(array)
     else:
+        # internal xarray array type
         return "[{} values with dtype={}]".format(array.size, array.dtype)
 
 
@@ -554,7 +593,7 @@ def diff_array_repr(a, b, compat):
     summary.append(diff_dim_summary(a, b))
 
     if not array_equiv(a.data, b.data):
-        temp = [wrap_indent(short_array_repr(obj), start="    ") for obj in (a, b)]
+        temp = [wrap_indent(short_numpy_repr(obj), start="    ") for obj in (a, b)]
         diff_data_repr = [
             ab_side + "\n" + ab_data_repr
             for ab_side, ab_data_repr in zip(("L", "R"), temp)

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -317,7 +317,9 @@ def _in_memory(self):
 
     @property
     def data(self):
-        if hasattr(self._data, "__array_function__"):
+        if hasattr(self._data, "__array_function__") or isinstance(
+            self._data, dask_array_type
+        ):
             return self._data
         else:
             return self.values

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -149,8 +149,8 @@ def test_binary_op(self):
     def test_repr(self):
         expected = dedent(
             """\
-        <xarray.Variable (x: 4, y: 6)>
-        dask.array<shape=(4, 6), dtype=float64, chunksize=(2, 2)>"""
+            <xarray.Variable (x: 4, y: 6)>
+            dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2)>"""
         )
         assert expected == repr(self.lazy_var)
 
@@ -491,41 +491,55 @@ def test_dot(self):
         lazy = self.lazy_array.dot(self.lazy_array[0])
         self.assertLazyAndAllClose(eager, lazy)
 
+    @pytest.mark.skipif(LooseVersion(dask.__version__) >= "2.0", reason="no meta")
+    def test_dataarray_repr_legacy(self):
+        data = build_dask_array("data")
+        nonindex_coord = build_dask_array("coord")
+        a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
+        expected = dedent(
+            """\
+            <xarray.DataArray 'data' (x: 1)>
+            dask.array<data, shape=(1,), dtype=int64, chunksize=(1,)>
+            Coordinates:
+                y        (x) int64 dask.array<chunksize=(1,)>
+            Dimensions without coordinates: x"""
+        )
+        assert expected == repr(a)
+        assert kernel_call_count == 0  # should not evaluate dask array
+
+    @pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta")
     def test_dataarray_repr(self):
-        # Test that __repr__ converts the dask backend to numpy
-        # in neither the data variable nor the non-index coords
         data = build_dask_array("data")
         nonindex_coord = build_dask_array("coord")
         a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
         expected = dedent(
             """\
-        <xarray.DataArray 'data' (x: 1)>
-        dask.array<shape=(1,), dtype=int64, chunksize=(1,)>
-        Coordinates:
-            y        (x) int64 dask.array<shape=(1,), chunksize=(1,)>
-        Dimensions without coordinates: x"""
+            <xarray.DataArray 'data' (x: 1)>
+            dask.array<data, shape=(1,), dtype=int64, chunksize=(1,)>
+            Coordinates:
+                y        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
+            Dimensions without coordinates: x"""
         )
         assert expected == repr(a)
-        assert kernel_call_count == 0
+        assert kernel_call_count == 0  # should not evaluate dask array
 
+    @pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta")
     def test_dataset_repr(self):
-        # Test that pickling/unpickling converts the dask backend
-        # to numpy in neither the data variables nor the non-index coords
         data = build_dask_array("data")
         nonindex_coord = build_dask_array("coord")
         ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)})
         expected = dedent(
             """\
-        <xarray.Dataset>
-        Dimensions:  (x: 1)
-        Coordinates:
-            y        (x) int64 dask.array<shape=(1,), chunksize=(1,)>
-        Dimensions without coordinates: x
-        Data variables:
-            a        (x) int64 dask.array<shape=(1,), chunksize=(1,)>"""
+            <xarray.Dataset>
+            Dimensions:  (x: 1)
+            Coordinates:
+                y        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
+            Dimensions without coordinates: x
+            Data variables:
+                a        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>"""
         )
         assert expected == repr(ds)
-        assert kernel_call_count == 0
+        assert kernel_call_count == 0  # should not evaluate dask array
 
     def test_dataarray_pickle(self):
         # Test that pickling/unpickling converts the dask backend

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -57,30 +57,34 @@ def test_repr(self):
         data_array = DataArray(v, coords, name="my_variable")
         expected = dedent(
             """\
-        <xarray.DataArray 'my_variable' (time: 2, x: 3)>
-        array([[1, 2, 3],
-               [4, 5, 6]])
-        Coordinates:
-          * x        (x) int64 0 1 2
-            other    int64 0
-        Dimensions without coordinates: time
-        Attributes:
-            foo:      bar"""
+            <xarray.DataArray 'my_variable' (time: 2, x: 3)>
+            array([[1, 2, 3],
+                   [4, 5, 6]])
+            Coordinates:
+              * x        (x) int64 0 1 2
+                other    int64 0
+            Dimensions without coordinates: time
+            Attributes:
+                foo:      bar"""
         )
         assert expected == repr(data_array)
 
     def test_repr_multiindex(self):
         expected = dedent(
             """\
-        <xarray.DataArray (x: 4)>
-        array([0, 1, 2, 3])
-        Coordinates:
-          * x        (x) MultiIndex
-          - level_1  (x) object 'a' 'a' 'b' 'b'
-          - level_2  (x) int64 1 2 1 2"""
+            <xarray.DataArray (x: 4)>
+            array([0, 1, 2, 3])
+            Coordinates:
+              * x        (x) MultiIndex
+              - level_1  (x) object 'a' 'a' 'b' 'b'
+              - level_2  (x) int64 1 2 1 2"""
         )
         assert expected == repr(self.mda)
 
+    @pytest.mark.skipif(
+        LooseVersion(np.__version__) < "1.15",
+        reason="old versions of numpy have different printing behavior",
+    )
     def test_repr_multiindex_long(self):
         mindex_long = pd.MultiIndex.from_product(
             [["a", "b", "c", "d"], [1, 2, 3, 4, 5, 6, 7, 8]],
@@ -89,13 +93,13 @@ def test_repr_multiindex_long(self):
         mda_long = DataArray(list(range(32)), coords={"x": mindex_long}, dims="x")
         expected = dedent(
             """\
-        <xarray.DataArray (x: 32)>
-        array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
-               18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])
-        Coordinates:
-          * x        (x) MultiIndex
-          - level_1  (x) object 'a' 'a' 'a' 'a' 'a' 'a' 'a' ... 'd' 'd' 'd' 'd' 'd' 'd'
-          - level_2  (x) int64 1 2 3 4 5 6 7 8 1 2 3 4 5 6 ... 4 5 6 7 8 1 2 3 4 5 6 7 8"""  # noqa: E501
+            <xarray.DataArray (x: 32)>
+            array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+                   17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])
+            Coordinates:
+              * x        (x) MultiIndex
+              - level_1  (x) object 'a' 'a' 'a' 'a' 'a' 'a' 'a' ... 'd' 'd' 'd' 'd' 'd' 'd'
+              - level_2  (x) int64 1 2 3 4 5 6 7 8 1 2 3 4 5 6 ... 4 5 6 7 8 1 2 3 4 5 6 7 8"""
         )
         assert expected == repr(mda_long)