From 29fec858e0f249789525c4e28c5da1e039cb675d Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Mon, 1 May 2017 12:41:02 -0400
Subject: [PATCH 1/3] Add tocsr method and sorted checking

pull out linear_loc to separate function
---
 sparse/core.py            | 157 ++++++++++++++++++++++++++++----------
 sparse/tests/test_core.py |   4 +-
 sparse/utils.py           |  13 ++++
 3 files changed, 132 insertions(+), 42 deletions(-)

diff --git a/sparse/core.py b/sparse/core.py
index be9c06c7..0750e402 100644
--- a/sparse/core.py
+++ b/sparse/core.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import, division, print_function
 
-from collections import Iterable
+from collections import Iterable, defaultdict, deque
 from functools import reduce
 import numbers
 import operator
@@ -9,6 +9,12 @@
 import scipy.sparse
 
 
+try:  # Windows compatibility
+    int = long
+except NameError:
+    pass
+
+
 class COO(object):
     """ A Sparse Multidimensional Array
 
@@ -29,9 +35,9 @@ class COO(object):
     --------
     >>> x = np.eye(4)
     >>> x[2, 3] = 5
-    >>> s = COO.from_numpy(x)
+    >>> s = COO(x)
     >>> s
-    <COO: shape=(4, 4), dtype=float64, nnz=5>
+    <COO: shape=(4, 4), dtype=float64, nnz=5, sorted=True, duplicates=False>
     >>> s.data
     array([ 1.,  1.,  1.,  5.,  1.])
     >>> s.coords
@@ -50,9 +56,9 @@ class COO(object):
     >>> data = [1, 2, 3, 4, 5]
     >>> y = COO(coords, data, shape=(3, 4, 5))
     >>> y
-    <COO: shape=(3, 4, 5), dtype=int64, nnz=5>
+    <COO: shape=(3, 4, 5), dtype=int64, nnz=5, sorted=False, duplicates=True>
     >>> tensordot(s, y, axes=(0, 1))
-    <COO: shape=(4, 3, 5), dtype=float64, nnz=6>
+    <COO: shape=(4, 3, 5), dtype=float64, nnz=6, sorted=False, duplicates=False>
 
     Following scipy.sparse conventions you can also pass these as a tuple with
     rows and columns
@@ -73,7 +79,7 @@ class COO(object):
 
     >>> d = {(0, 0, 0): 1, (1, 2, 3): 2, (1, 1, 0): 3}
     >>> COO(d)
-    <COO: shape=(2, 3, 4), dtype=int64, nnz=3>
+    <COO: shape=(2, 3, 4), dtype=int64, nnz=3, sorted=False, duplicates=False>
 
     >>> L = [((0, 0), 1),
     ...      ((1, 1), 2),
@@ -89,17 +95,21 @@ class COO(object):
     """
     __array_priority__ = 12
 
-    def __init__(self, coords, data=None, shape=None, has_duplicates=True):
+    def __init__(self, coords, data=None, shape=None, has_duplicates=True,
+                 sorted=False):
+        self._cache = defaultdict(lambda: deque(maxlen=3))
         if data is None:
             # {(i, j, k): x, (i, j, k): y, ...}
             if isinstance(coords, dict):
                 coords = list(coords.items())
+                has_duplicates = False
 
             if isinstance(coords, np.ndarray):
                 result = COO.from_numpy(coords)
                 self.coords = result.coords
                 self.data = result.data
                 self.has_duplicates = result.has_duplicates
+                self.sorted = result.sorted
                 self.shape = result.shape
                 return
 
@@ -143,6 +153,7 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True):
         self.coords = self.coords.astype(dtype)
         assert not self.shape or len(data) == self.coords.shape[1]
         self.has_duplicates = has_duplicates
+        self.sorted = sorted
 
     @classmethod
     def from_numpy(cls, x):
@@ -153,7 +164,8 @@ def from_numpy(cls, x):
         else:
             coords = []
             data = x
-        return cls(coords, data, shape=x.shape)
+        return cls(coords, data, shape=x.shape, has_duplicates=False,
+                   sorted=True)
 
     def todense(self):
         self = self.sum_duplicates()
@@ -169,7 +181,9 @@ def from_scipy_sparse(cls, x):
         coords = np.empty((2, x.nnz), dtype=x.row.dtype)
         coords[0, :] = x.row
         coords[1, :] = x.col
-        return COO(coords, x.data, shape=x.shape, has_duplicates=not x.has_canonical_format)
+        return COO(coords, x.data, shape=x.shape,
+                   has_duplicates=not x.has_canonical_format,
+                   sorted=x.has_canonical_format)
 
     @property
     def dtype(self):
@@ -235,11 +249,14 @@ def __getitem__(self, index):
         shape = tuple(shape)
         data = self.data[mask]
 
-        return COO(coords, data, shape=shape, has_duplicates=self.has_duplicates)
+        return COO(coords, data, shape=shape,
+                   has_duplicates=self.has_duplicates,
+                   sorted=self.sorted)
 
     def __str__(self):
-        return "<COO: shape=%s, dtype=%s, nnz=%d>" % (self.shape, self.dtype,
-                self.nnz)
+        return "<COO: shape=%s, dtype=%s, nnz=%d, sorted=%s, duplicates=%s>" % (
+                self.shape, self.dtype, self.nnz, self.sorted,
+                self.has_duplicates)
 
     __repr__ = __str__
 
@@ -272,6 +289,8 @@ def reduction(self, method, axis=None, keepdims=False, dtype=None):
             a = getattr(a, method)(axis=0, **kwargs)
             if isinstance(a, scipy.sparse.spmatrix):
                 a = COO.from_scipy_sparse(a)
+                a.sorted = self.sorted
+                a.has_duplicates = False
             elif isinstance(a, np.matrix):
                 a = np.asarray(a)[0]
                 a = COO.from_numpy(a)
@@ -328,22 +347,37 @@ def T(self):
     def dot(self, other):
         return dot(self, other)
 
+    def linear_loc(self, signed=False):
+        """ Index location of every piece of data in a flattened array
+
+        This is used internally to check for duplicates, re-order, reshape,
+        etc..
+        """
+        n = reduce(operator.mul, self.shape)
+        if signed:
+            n = -n
+        dtype = np.min_scalar_type(n)
+        out = np.zeros(self.nnz, dtype=dtype)
+        tmp = np.zeros(self.nnz, dtype=dtype)
+        strides = 1
+        for i, d in enumerate(self.shape[::-1]):
+            # out += self.coords[-(i + 1), :].astype(dtype) * strides
+            np.multiply(self.coords[-(i + 1), :], strides, out=tmp, dtype=dtype)
+            np.add(tmp, out, out=out)
+            strides *= d
+        return out
+
     def reshape(self, shape):
         if self.shape == shape:
             return self
         if any(d == -1 for d in shape):
-            extra = np.uint64(np.prod(self.shape) /
-                              np.prod([d for d in shape if d != -1]))
+            extra = int(np.prod(self.shape) /
+                        np.prod([d for d in shape if d != -1]))
             shape = tuple([d if d != -1 else extra for d in shape])
         if self.shape == shape:
             return self
         # TODO: this np.prod(self.shape) enforces a 2**64 limit to array size
-        dtype = np.min_scalar_type(np.prod(self.shape))
-        linear_loc = np.zeros(self.nnz, dtype=dtype)
-        strides = 1
-        for i, d in enumerate(self.shape[::-1]):
-            linear_loc += self.coords[-(i + 1), :].astype(dtype) * strides
-            strides *= d
+        linear_loc = self.linear_loc()
 
         coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max(shape)))
         strides = 1
@@ -351,7 +385,9 @@ def reshape(self, shape):
             coords[-(i + 1), :] = (linear_loc // strides) % d
             strides *= d
 
-        return COO(coords, self.data, shape, has_duplicates=self.has_duplicates)
+        return COO(coords, self.data, shape,
+                   has_duplicates=self.has_duplicates,
+                   sorted=self.sorted)
 
     def to_scipy_sparse(self):
         assert self.ndim == 2
@@ -359,9 +395,25 @@ def to_scipy_sparse(self):
                                           (self.coords[0],
                                            self.coords[1])),
                                           shape=self.shape)
-        result.has_canonical_format = not self.has_duplicates
+        result.has_canonical_format = (not self.has_duplicates and self.sorted)
         return result
 
+    def _tocsr(self):
+        assert self.ndim == 2
+
+        # Pass 1: sum duplicates
+        self.sum_duplicates()
+
+        # Pass 2: sort indices
+        self.sort_indices()
+        row, col = self.coords
+
+        # Pass 3: count nonzeros in each row
+        indptr = np.zeros(self.shape[0] + 1, dtype=np.int64)
+        np.cumsum(np.bincount(row, minlength=self.shape[0]), out=indptr[1:])
+
+        return scipy.sparse.csr_matrix((self.data, col, indptr), shape=self.shape)
+
     def tocsr(self):
         try:
             return self._csr
@@ -373,10 +425,8 @@ def tocsr(self):
         except AttributeError:
             pass
 
-        coo = self.to_scipy_sparse()
-        csr = coo.tocsr()
-        self._csr = csr
-        return csr
+        self._csr = self._tocsr()
+        return self._csr
 
     def tocsc(self):
         try:
@@ -388,10 +438,25 @@ def tocsc(self):
             return self._csc
         except AttributeError:
             pass
-        coo = self.to_scipy_sparse()
-        csc = coo.tocsc()
-        self._csc = csc
-        return csc
+
+        self._csc = self.tocsr().tocsc()
+        return self._csc
+
+    def sort_indices(self):
+        if self.sorted:
+            return
+
+        linear = self.linear_loc(signed=True)
+
+        if (np.diff(linear) > 0).all():  # already sorted
+            self.sorted = True
+            return self
+
+        order = np.argsort(linear)
+        self.coords = self.coords[:, order]
+        self.data = self.data[order]
+        self.sorted = True
+        return self
 
     def sum_duplicates(self):
         # Inspired by scipy/sparse/coo.py::sum_duplicates
@@ -400,15 +465,21 @@ def sum_duplicates(self):
             return self
         if not np.prod(self.coords.shape):
             return self
-        order = np.lexsort(self.coords)
-        coords = self.coords[:, order]
-        data = self.data[order]
-        unique_mask = (coords[:, 1:] != coords[:, :-1]).any(axis=0)
+
+        self.sort_indices()
+
+        linear = self.linear_loc()
+        unique_mask = np.diff(linear) != 0
+
+        if unique_mask.sum() == len(unique_mask):  # already unique
+            self.has_duplicates = False
+            return self
+
         unique_mask = np.append(True, unique_mask)
 
-        coords = coords[:, unique_mask]
+        coords = self.coords[:, unique_mask]
         (unique_inds,) = np.nonzero(unique_mask)
-        data = np.add.reduceat(data, unique_inds, dtype=data.dtype)
+        data = np.add.reduceat(self.data, unique_inds, dtype=self.data.dtype)
 
         self.data = data
         self.coords = coords
@@ -430,7 +501,8 @@ def __radd__(self, other):
         return self + other
 
     def __neg__(self):
-        return COO(self.coords, -self.data, self.shape, self.has_duplicates)
+        return COO(self.coords, -self.data, self.shape, self.has_duplicates,
+                   self.sorted)
 
     def __sub__(self, other):
         return self + (-other)
@@ -462,7 +534,9 @@ def elemwise(self, func, *args, **kwargs):
             raise ValueError("Performing this operation would produce "
                     "a dense result: %s" % str(func))
         return COO(self.coords, func(self.data, *args, **kwargs),
-                   shape=self.shape, has_duplicates=self.has_duplicates)
+                   shape=self.shape,
+                   has_duplicates=self.has_duplicates,
+                   sorted=self.sorted)
 
     def elemwise_binary(self, func, other, *args, **kwargs):
         assert isinstance(other, COO)
@@ -675,6 +749,7 @@ def tensordot(a, b, axes=2):
             res = res.todense()
         else:
             res = COO.from_scipy_sparse(res)  # <--- modified
+            res.has_duplicates = False
     if isinstance(res, np.matrix):
         res = np.asarray(res)
     return res.reshape(olda + oldb)
@@ -743,7 +818,8 @@ def concatenate(arrays, axis=0):
     shape[axis] = dim
     has_duplicates = any(x.has_duplicates for x in arrays)
 
-    return COO(coords, data, shape=shape, has_duplicates=has_duplicates)
+    return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
+               sorted=(axis == 0) and all(a.sorted for a in arrays))
 
 
 def stack(arrays, axis=0):
@@ -769,4 +845,5 @@ def stack(arrays, axis=0):
     coords.insert(axis, new)
     coords = np.stack(coords, axis=0)
 
-    return COO(coords, data, shape=shape, has_duplicates=has_duplicates)
+    return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
+               sorted=(axis == 0) and all(a.sorted for a in arrays))
diff --git a/sparse/tests/test_core.py b/sparse/tests/test_core.py
index e531245f..7b231934 100644
--- a/sparse/tests/test_core.py
+++ b/sparse/tests/test_core.py
@@ -71,7 +71,7 @@ def test_large_reshape():
     col = row % m # np.random.randint(0, m, size=n, dtype=np.uint16)
     data = np.ones(n, dtype=np.uint8)
 
-    x = COO((data, (row, col)))
+    x = COO((data, (row, col)), sorted=True, has_duplicates=False)
 
     assert_eq(x, x.reshape(x.shape))
 
@@ -366,8 +366,8 @@ def test_scalar_exponentiation():
 
 def test_create_with_lists_of_tuples():
     L = [((0, 0, 0), 1),
-         ((1, 1, 1), 2),
          ((1, 2, 1), 1),
+         ((1, 1, 1), 2),
          ((1, 3, 2), 3)]
 
     s = COO(L)
diff --git a/sparse/utils.py b/sparse/utils.py
index 76af90c2..cf58a225 100644
--- a/sparse/utils.py
+++ b/sparse/utils.py
@@ -1,9 +1,18 @@
 import numpy as np
+from .core import COO
 
 
 def assert_eq(x, y):
     assert x.shape == y.shape
     assert x.dtype == y.dtype
+
+    if isinstance(x, COO):
+        if x.sorted:
+            assert is_lexsorted(x)
+    if isinstance(y, COO):
+        if y.sorted:
+            assert is_lexsorted(y)
+
     if hasattr(x, 'todense'):
         xx = x.todense()
     else:
@@ -13,3 +22,7 @@ def assert_eq(x, y):
     else:
         yy = y
     assert np.allclose(xx, yy)
+
+
+def is_lexsorted(x):
+    return not x.shape or (np.diff(x.linear_loc()) > 0).all()

From cebb4a6ee2221271e6bc402ac0dc4fcd7d03d622 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Mon, 1 May 2017 18:08:55 -0400
Subject: [PATCH 2/3] Cache reshape and transpose

This stores the last few objects returned from reshape and transpose
calls.  This allows efficiencies from in-place operations like
`sum_duplicates` and `sort_indices` to persist in interative workflows.

Modern NumPy programmers are accustomed to operations like .transpose()
being cheap and aren't accustomed to having to pay sorting costs after
many computations.  These assumptions are no longer true in sparse by
default.  However, by caching recent transpose and reshape objects we
can reuse their inplace modifications.  This greatly accelerates common
machine learning workloads.
---
 sparse/core.py            | 33 +++++++++++++++++++++------------
 sparse/tests/test_core.py | 21 +++++++++++++--------
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/sparse/core.py b/sparse/core.py
index 0750e402..adea4807 100644
--- a/sparse/core.py
+++ b/sparse/core.py
@@ -154,6 +154,7 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True,
         assert not self.shape or len(data) == self.coords.shape[1]
         self.has_duplicates = has_duplicates
         self.sorted = sorted
+        self._cache = defaultdict(lambda: deque(maxlen=3))
 
     @classmethod
     def from_numpy(cls, x):
@@ -209,6 +210,9 @@ def __getitem__(self, index):
             index = (index,)
         index = tuple(ind + self.shape[i] if isinstance(ind, numbers.Integral) and ind < 0 else ind
                       for i, ind in enumerate(index))
+        if (all(ind == slice(None) or ind == slice(0, d)
+                for ind, d in zip(index, self.shape))):
+            return self
         mask = np.ones(self.nnz, dtype=bool)
         for i, ind in enumerate([i for i in index if i is not None]):
             if ind == slice(None, None):
@@ -325,19 +329,15 @@ def transpose(self, axes=None):
         if axes == tuple(range(self.ndim)):
             return self
 
+        for ax, value in self._cache['transpose']:
+            if ax == axes:
+                return value
+
         shape = tuple(self.shape[ax] for ax in axes)
         result = COO(self.coords[axes, :], self.data, shape,
                      has_duplicates=self.has_duplicates)
 
-        if axes == (1, 0):
-            try:
-                result._csc = self._csr.T
-            except AttributeError:
-                pass
-            try:
-                result._csr = self._csc.T
-            except AttributeError:
-                pass
+        self._cache['transpose'].append((axes, result))
         return result
 
     @property
@@ -374,8 +374,14 @@ def reshape(self, shape):
             extra = int(np.prod(self.shape) /
                         np.prod([d for d in shape if d != -1]))
             shape = tuple([d if d != -1 else extra for d in shape])
+
         if self.shape == shape:
             return self
+
+        for sh, value in self._cache['reshape']:
+            if sh == shape:
+                return value
+
         # TODO: this np.prod(self.shape) enforces a 2**64 limit to array size
         linear_loc = self.linear_loc()
 
@@ -385,9 +391,12 @@ def reshape(self, shape):
             coords[-(i + 1), :] = (linear_loc // strides) % d
             strides *= d
 
-        return COO(coords, self.data, shape,
-                   has_duplicates=self.has_duplicates,
-                   sorted=self.sorted)
+        result = COO(coords, self.data, shape,
+                     has_duplicates=self.has_duplicates,
+                     sorted=self.sorted)
+
+        self._cache['reshape'].append((shape, result))
+        return result
 
     def to_scipy_sparse(self):
         assert self.ndim == 2
diff --git a/sparse/tests/test_core.py b/sparse/tests/test_core.py
index 7b231934..5a58df8e 100644
--- a/sparse/tests/test_core.py
+++ b/sparse/tests/test_core.py
@@ -418,14 +418,6 @@ def test_cache_csr():
     assert s.tocsr() is s.tocsr()
     assert s.tocsc() is s.tocsc()
 
-    st = s.T
-
-    assert_eq(st._csr, st)
-    assert_eq(st._csc, st)
-
-    assert isinstance(st.tocsr(), scipy.sparse.csr_matrix)
-    assert isinstance(st.tocsc(), scipy.sparse.csc_matrix)
-
 
 def test_empty_shape():
     x = COO([], [1.0])
@@ -469,3 +461,16 @@ def test_add_many_sparse_arrays():
     x = COO({(1, 1): 1})
     y = sum([x] * 100)
     assert y.nnz < np.prod(y.shape)
+
+
+def test_caching():
+    x = COO({(10, 10, 10): 1})
+
+    assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr()
+
+    x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1})
+
+    for i in range(x.ndim):
+        x.reshape((1,) * i + (2,) + (1,) * (x.ndim - i - 1))
+
+    assert len(x._cache['reshape']) < 5

From 2a887635b4bd60be7c89ea9c72e14d851fd31ece Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 3 May 2017 13:10:57 -0400
Subject: [PATCH 3/3] Opt in to caching

---
 sparse/core.py            | 109 +++++++++++++++++++++++++-------------
 sparse/tests/test_core.py |   6 ++-
 2 files changed, 77 insertions(+), 38 deletions(-)

diff --git a/sparse/core.py b/sparse/core.py
index adea4807..e363db34 100644
--- a/sparse/core.py
+++ b/sparse/core.py
@@ -96,8 +96,10 @@ class COO(object):
     __array_priority__ = 12
 
     def __init__(self, coords, data=None, shape=None, has_duplicates=True,
-                 sorted=False):
-        self._cache = defaultdict(lambda: deque(maxlen=3))
+                 sorted=False, cache=False):
+        self._cache = None
+        if cache:
+            self.enable_caching()
         if data is None:
             # {(i, j, k): x, (i, j, k): y, ...}
             if isinstance(coords, dict):
@@ -154,7 +156,30 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True,
         assert not self.shape or len(data) == self.coords.shape[1]
         self.has_duplicates = has_duplicates
         self.sorted = sorted
+
+    def enable_caching(self):
+        """ Enable caching of reshape, transpose, and tocsr/csc operations
+
+        This enables efficient iterative workflows that make heavy use of
+        csr/csc operations, such as tensordot.  This maintains a cache of
+        recent results of reshape and transpose so that operations like
+        tensordot (which uses both internally) store efficiently stored
+        representations for repeated use.  This can significantly cut down on
+        computational costs in common numeric algorithms.
+
+        However, this also assumes that neither this object, nor the downstream
+        objects will have their data mutated.
+
+        Examples
+        --------
+        >>> x.enable_caching()  # doctest: +SKIP
+        >>> csr1 = x.transpose((2, 0, 1)).reshape((100, 120)).tocsr()  # doctest: +SKIP
+        >>> csr2 = x.transpose((2, 0, 1)).reshape((100, 120)).tocsr()  # doctest: +SKIP
+        >>> csr1 is csr2  # doctest: +SKIP
+        True
+        """
         self._cache = defaultdict(lambda: deque(maxlen=3))
+        return self
 
     @classmethod
     def from_numpy(cls, x):
@@ -329,15 +354,18 @@ def transpose(self, axes=None):
         if axes == tuple(range(self.ndim)):
             return self
 
-        for ax, value in self._cache['transpose']:
-            if ax == axes:
-                return value
+        if self._cache is not None:
+            for ax, value in self._cache['transpose']:
+                if ax == axes:
+                    return value
 
         shape = tuple(self.shape[ax] for ax in axes)
         result = COO(self.coords[axes, :], self.data, shape,
-                     has_duplicates=self.has_duplicates)
+                     has_duplicates=self.has_duplicates,
+                     cache=self._cache is not None)
 
-        self._cache['transpose'].append((axes, result))
+        if self._cache is not None:
+            self._cache['transpose'].append((axes, result))
         return result
 
     @property
@@ -378,9 +406,10 @@ def reshape(self, shape):
         if self.shape == shape:
             return self
 
-        for sh, value in self._cache['reshape']:
-            if sh == shape:
-                return value
+        if self._cache is not None:
+            for sh, value in self._cache['reshape']:
+                if sh == shape:
+                    return value
 
         # TODO: this np.prod(self.shape) enforces a 2**64 limit to array size
         linear_loc = self.linear_loc()
@@ -393,9 +422,10 @@ def reshape(self, shape):
 
         result = COO(coords, self.data, shape,
                      has_duplicates=self.has_duplicates,
-                     sorted=self.sorted)
+                     sorted=self.sorted, cache=self._cache is not None)
 
-        self._cache['reshape'].append((shape, result))
+        if self._cache is not None:
+            self._cache['reshape'].append((shape, result))
         return result
 
     def to_scipy_sparse(self):
@@ -424,32 +454,39 @@ def _tocsr(self):
         return scipy.sparse.csr_matrix((self.data, col, indptr), shape=self.shape)
 
     def tocsr(self):
-        try:
-            return self._csr
-        except AttributeError:
-            pass
-        try:
-            self._csr = self._csc.tocsr()
-            return self._csr
-        except AttributeError:
-            pass
-
-        self._csr = self._tocsr()
-        return self._csr
+        if self._cache is not None:
+            try:
+                return self._csr
+            except AttributeError:
+                pass
+            try:
+                self._csr = self._csc.tocsr()
+                return self._csr
+            except AttributeError:
+                pass
+
+            self._csr = csr = self._tocsr()
+        else:
+            csr = self._tocsr()
+        return csr
 
     def tocsc(self):
-        try:
-            return self._csc
-        except AttributeError:
-            pass
-        try:
-            self._csc = self._csr.tocsc()
-            return self._csc
-        except AttributeError:
-            pass
-
-        self._csc = self.tocsr().tocsc()
-        return self._csc
+        if self._cache is not None:
+            try:
+                return self._csc
+            except AttributeError:
+                pass
+            try:
+                self._csc = self._csr.tocsc()
+                return self._csc
+            except AttributeError:
+                pass
+
+            self._csc = csc = self.tocsr().tocsc()
+        else:
+            csc = self.tocsr().tocsc()
+
+        return csc
 
     def sort_indices(self):
         if self.sorted:
diff --git a/sparse/tests/test_core.py b/sparse/tests/test_core.py
index 5a58df8e..d6c4f171 100644
--- a/sparse/tests/test_core.py
+++ b/sparse/tests/test_core.py
@@ -411,7 +411,7 @@ def test_scipy_sparse_interface():
 
 def test_cache_csr():
     x = random_x((10, 5))
-    s = COO.from_numpy(x)
+    s = COO(x, cache=True)
 
     assert isinstance(s.tocsr(), scipy.sparse.csr_matrix)
     assert isinstance(s.tocsc(), scipy.sparse.csc_matrix)
@@ -465,10 +465,12 @@ def test_add_many_sparse_arrays():
 
 def test_caching():
     x = COO({(10, 10, 10): 1})
+    assert x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr()
 
+    x = COO({(10, 10, 10): 1}, cache=True)
     assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr()
 
-    x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1})
+    x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1}, cache=True)
 
     for i in range(x.ndim):
         x.reshape((1,) * i + (2,) + (1,) * (x.ndim - i - 1))