[NullBackend] Add tests; fix issue with preallocate=0

alcrene · alcrene · commit 54aad19897b4 · 2024-06-09T13:58:15.000+02:00
Tests were copied from test_backend_numpy and the parts checking the `_samples` array removed.

Fixed issue:
- Reset default preallocation to 1_000, like with NumPyBackend:
  it is still used for the stats array, so it makes sense to use
  a reasonable default.
- Preallocate = 0 no longer switches the allocation to object arrays,
  in contrast to NumPyBackend
  - IMO this is a bug in NumPyBackend: `grow_append` cannot know if
    ``preallocate = 0`` was used; it only looks at the `rigid` value
    to determine how to append.
  - Without this change, `grow_append` will always fail when we use
    `preallocate = 0` with multivariate statistics.
diff --git a/mcbackend/backends/null.py b/mcbackend/backends/null.py
@@ -50,7 +50,7 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non
             and grow the allocated memory by 10 % when needed.
             Exceptions are variables with non-rigid shapes (indicated by 0 in the shape tuple)
             where the correct amount of memory cannot be pre-allocated.
-            In these cases, and when ``preallocate == 0`` object arrays are used.
+            In these cases object arrays are used.
         """
         self._stat_is_rigid: Dict[str, bool] = {}
         self._stats: Dict[str, numpy.ndarray] = {}
@@ -63,7 +63,7 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non
             for var in variables:
                 rigid = is_rigid(var.shape) and not var.undefined_ndim and var.dtype != "str"
                 rigid_dict[var.name] = rigid
-                if preallocate > 0 and rigid:
+                if rigid:
                     reserve = (preallocate, *var.shape)
                     target_dict[var.name] = numpy.empty(reserve, var.dtype)
                 else:
@@ -101,7 +101,7 @@ def get_stats_at(self, idx: int, stat_names: Sequence[str]) -> Dict[str, numpy.n
 class NullRun(Run):
     """An MCMC run where samples are immediately discarded."""
 
-    def __init__(self, meta: RunMeta, *, preallocate: int=0) -> None:
+    def __init__(self, meta: RunMeta, *, preallocate: int) -> None:
         self._settings = {"preallocate": preallocate}
         self._chains: List[NullChain] = []
         super().__init__(meta)
@@ -119,7 +119,7 @@ def get_chains(self) -> Tuple[NullChain, ...]:
 class NullBackend(Backend):
     """A backend which discards samples immediately."""
 
-    def __init__(self, preallocate: int=0) -> None:
+    def __init__(self, preallocate: int = 1_000) -> None:
         self._settings = {"preallocate": preallocate}
         super().__init__()
 
diff --git a/mcbackend/test_backend_null.py b/mcbackend/test_backend_null.py
@@ -0,0 +1,213 @@
+import random
+
+import hagelkorn
+import numpy
+import pytest
+
+from mcbackend.backends.null import NullBackend, NullChain, NullRun
+from mcbackend.core import RunMeta, is_rigid
+from mcbackend.meta import Variable
+from mcbackend.test_utils import CheckBehavior, CheckPerformance, make_runmeta, make_draw
+
+class CheckNullBehavior(CheckBehavior):
+    """
+    Overrides tests which assert that data are recorded correctly
+    We perform all the operations of the original test, but in the
+    end we do the opposite: assert that an exception is raised
+    when either `get_draws` or `get_draws_at` is called.
+    Stats are still recorded, so that part of the tests is reproduced unchanged.
+    """
+
+    @pytest.mark.parametrize("with_stats", [False, True])
+    def test__append_get_at(self, with_stats):
+        rmeta = make_runmeta()
+        run = self.backend.init_run(rmeta)
+        chain = run.init_chain(7)
+
+        # Generate data
+        draw = make_draw(rmeta.variables)
+        stats = make_draw(rmeta.sample_stats) if with_stats else None
+
+        # Append to the chain
+        assert len(chain) == 0
+        chain.append(draw, stats)
+        assert len(chain) == 1
+
+        # Retrieve by index - Raises exception
+        with pytest.raises(RuntimeError):
+            chain.get_draws_at(0, [v.name for v in rmeta.variables])
+
+        # NB: Stats are still recorded and can be retrieved as with other chains
+        if with_stats:
+            actual = chain.get_stats_at(0, [v.name for v in rmeta.sample_stats])
+            assert isinstance(actual, dict)
+            assert set(actual) == set(stats)
+            for vn, act in actual.items():
+                numpy.testing.assert_array_equal(act, stats[vn])
+        pass
+
+    @pytest.mark.parametrize("with_stats", [False, True])
+    def test__append_get_with_changelings(self, with_stats):
+        rmeta = make_runmeta(flexibility=True)
+        run = self.backend.init_run(rmeta)
+        chain = run.init_chain(7)
+
+        # Generate draws and add them to the chain
+        n = 10
+        draws = [make_draw(rmeta.variables) for _ in range(n)]
+        if with_stats:
+            stats = [make_draw(rmeta.sample_stats) for _ in range(n)]
+        else:
+            stats = [None] * n
+
+        for d, s in zip(draws, stats):
+            chain.append(d, s)
+
+        # Fetching variables raises exception
+        for var in rmeta.variables:
+            expected = [draw[var.name] for draw in draws]
+            with pytest.raises(RuntimeError):
+                chain.get_draws(var.name)
+
+        if with_stats:
+            for var in rmeta.sample_stats:
+                expected = [stat[var.name] for stat in stats]
+                actual = chain.get_stats(var.name)
+                assert isinstance(actual, numpy.ndarray)
+                if var.dtype == "str":
+                    assert tuple(actual.shape) == tuple(numpy.shape(expected))
+                    # String dtypes have strange names
+                    assert "str" in actual.dtype.name
+                elif is_rigid(var.shape):
+                    assert tuple(actual.shape) == tuple(numpy.shape(expected))
+                    assert actual.dtype.name == var.dtype
+                    numpy.testing.assert_array_equal(actual, expected)
+                else:
+                    # Non-ridid variables are returned as object-arrays.
+                    assert actual.shape == (len(expected),)
+                    assert actual.dtype == object
+                    # Their values must be asserted elementwise to avoid shape problems.
+                    for act, exp in zip(actual, expected):
+                        numpy.testing.assert_array_equal(act, exp)
+        pass
+
+    @pytest.mark.parametrize(
+        "slc",
+        [
+            None,
+            slice(None, None, None),
+            slice(2, None, None),
+            slice(2, 10, None),
+            slice(2, 15, 3),  # every 3rd
+            slice(15, 2, -3),  # backwards every 3rd
+            slice(2, 15, -3),  # empty
+            slice(-8, None, None),  # the last 8
+            slice(-8, -2, 2),
+            slice(-50, -2, 2),
+            slice(15, 10),  # empty
+            slice(1, 1),  # empty
+        ],
+    )
+    def test__get_slicing(self, slc: slice):
+        # "A" are just numbers to make diagnosis easier.
+        # "B" are dynamically shaped to cover the edge cases.
+        rmeta = RunMeta(
+            variables=[Variable("A", "uint8"), Variable("M", "str", [2, 3])],
+            sample_stats=[Variable("B", "uint8", [2, -1])],
+            data=[],
+        )
+        run = self.backend.init_run(rmeta)
+        chain = run.init_chain(0)
+
+        # Generate draws and add them to the chain
+        N = 20
+        draws = [make_draw(rmeta.variables) for n in range(N)]
+        stats = [make_draw(rmeta.sample_stats) for n in range(N)]
+        for d, s in zip(draws, stats):
+            chain.append(d, s)
+        assert len(chain) == N
+
+        # slc=None in this test means "don't pass it".
+        # The implementations should default to slc=slice(None, None, None).
+        kwargs = dict(slc=slc) if slc is not None else {}
+        with pytest.raises(RuntimeError):
+            chain.get_draws("A", **kwargs)
+        with pytest.raises(RuntimeError):
+            chain.get_draws("M", **kwargs)
+        act_stats = chain.get_stats("B", **kwargs)
+        expected_stats = [s["B"] for s in stats][slc or slice(None, None, None)]
+
+        # Stat "B" is dynamically shaped, which means we're dealing with
+        # dtype=object arrays. These must be checked elementwise.
+        assert len(act_stats) == len(expected_stats)
+        assert act_stats.dtype == object
+        for a, e in zip(act_stats, expected_stats):
+            numpy.testing.assert_array_equal(a, e)
+        pass
+
+    def test__to_inferencedata(self):
+        """
+        NullBackend doesn’t support `to_inferencedata`, so there isn’t
+        anything to test here.
+        """
+        pass
+
+class TestNullBackend(CheckNullBehavior, CheckPerformance):
+    cls_backend = NullBackend
+    cls_run = NullRun
+    cls_chain = NullChain
+
+    # `test_targets` and `test_growing` are copied over from TestNumPyBackend.
+    # The lines testing sample storage removed, since neither `_samples`
+    # nor `_var_is_rigid` are not supported by NullBackend.
+    # However if one were to add tests for `_stats` and `_stat_is_rigid`
+    # to the NumPy suite, we could port those here.
+
+    def test_targets(self):
+        imb = NullBackend(preallocate=123)
+        rm = RunMeta(
+            rid=hagelkorn.random(),
+            variables=[
+                Variable("tensor", "int8", (3, 4, 5)),
+                Variable("scalar", "float64", ()),
+                Variable("changeling", "uint16", (3, -1)),
+            ],
+        )
+        run = imb.init_run(rm)
+        chain = run.init_chain(0)
+        pass
+
+    @pytest.mark.parametrize("preallocate", [0, 75])
+    def test_growing(self, preallocate):
+        imb = NullBackend(preallocate=preallocate)
+        rm = RunMeta(
+            rid=hagelkorn.random(),
+            variables=[
+                Variable(
+                    "A",
+                    "float32",
+                    (2,),
+                ),
+                Variable(
+                    "B",
+                    "float32",
+                    (-1,),
+                ),
+            ],
+        )
+        run = imb.init_run(rm)
+        chain = run.init_chain(0)
+        # TODO: Check dimensions of stats array ?
+        for _ in range(130):
+            draw = {
+                "A": numpy.random.uniform(size=(2,)),
+                "B": numpy.random.uniform(size=(random.randint(0, 10),)),
+            }
+            chain.append(draw)
+        # TODO: Check dimensions of stats array ?
+        pass
+
+if __name__ == "__main__":
+    tc = TestNullBackend()
+    df = tc.run_all_benchmarks()
+    print(df)