Adjust statespace to match statsmodels

jessegrabowski · jessegrabowski · commit 200a577fe88f · 2024-07-27T22:57:54.000+08:00
diff --git a/pymc_experimental/statespace/models/ETS.py b/pymc_experimental/statespace/models/ETS.py
@@ -208,7 +208,9 @@ def __init__(
     @property
     def param_names(self):
         names = [
-            "x0",
+            "initial_level",
+            "initial_trend",
+            "initial_seasonal",
             "P0",
             "alpha",
             "beta",
@@ -218,10 +220,12 @@ def param_names(self):
             "sigma_obs",
         ]
         if not self.trend:
+            names.remove("initial_trend")
             names.remove("beta")
         if not self.damped_trend:
             names.remove("phi")
         if not self.seasonal:
+            names.remove("initial_seasonal")
             names.remove("gamma")
         if not self.measurement_error:
             names.remove("sigma_obs")
@@ -231,14 +235,19 @@ def param_names(self):
     @property
     def param_info(self) -> dict[str, dict[str, Any]]:
         info = {
-            "x0": {
-                "shape": (self.k_states,),
-                "constraints": None,
-            },
             "P0": {
                 "shape": (self.k_states, self.k_states),
                 "constraints": "Positive Semi-definite",
             },
+            "initial_level": {
+                "shape": None if self.k_endog == 1 else (self.k_endog,),
+                "constraints": None,
+            },
+            "initial_trend": {
+                "shape": None if self.k_endog == 1 else (self.k_endog,),
+                "constraints": None,
+            },
+            "initial_seasonal": {"shape": (self.seasonal_periods,), "constraints": None},
             "sigma_obs": {
                 "shape": None if self.k_endog == 1 else (self.k_endog,),
                 "constraints": "Positive",
@@ -291,16 +300,20 @@ def shock_names(self):
     @property
     def param_dims(self):
         coord_map = {
-            "x0": (ALL_STATE_DIM,),
             "P0": (ALL_STATE_DIM, ALL_STATE_AUX_DIM),
             "sigma_obs": (OBS_STATE_DIM,),
             "sigma_state": (OBS_STATE_DIM,),
+            "initial_level": (OBS_STATE_DIM,),
+            "initial_trend": (OBS_STATE_DIM,),
+            "initial_seasonal": (ETS_SEASONAL_DIM,),
             "seasonal_param": (ETS_SEASONAL_DIM,),
         }
 
         if self.k_endog == 1:
-            coord_map["sigma_state"] = ()
-            coord_map["sigma_obs"] = ()
+            coord_map["sigma_state"] = None
+            coord_map["sigma_obs"] = None
+            coord_map["initial_level"] = None
+            coord_map["initial_trend"] = None
         if not self.measurement_error:
             del coord_map["sigma_obs"]
         if not self.seasonal:
@@ -317,15 +330,16 @@ def coords(self) -> dict[str, Sequence]:
         return coords
 
     def make_symbolic_graph(self) -> None:
-        x0 = self.make_and_register_variable("x0", shape=(self.k_states,), dtype=floatX)
         P0 = self.make_and_register_variable(
             "P0", shape=(self.k_states, self.k_states), dtype=floatX
         )
-
-        # x0, P0, Z, and R do not depend on the user config beyond the shape
-        self.ssm["initial_state", :] = x0
         self.ssm["initial_state_cov"] = P0
 
+        initial_level = self.make_and_register_variable(
+            "initial_level", shape=(self.k_endog,) if self.k_endog > 1 else (), dtype=floatX
+        )
+        self.ssm["initial_state", 1] = initial_level
+
         # The shape of R can be pre-allocated, then filled with the required parameters
         R = pt.zeros((self.k_states, self.k_posdef))
         R = pt.set_subtensor(R[0, :], 1.0)  # We will always have y_t = ... + e_t
@@ -337,6 +351,11 @@ def make_symbolic_graph(self) -> None:
         T_base = pt.as_tensor_variable(np.array([[0.0, 0.0], [0.0, 1.0]]))
 
         if self.trend:
+            initial_trend = self.make_and_register_variable(
+                "initial_trend", shape=(self.k_endog,) if self.k_endog > 1 else (), dtype=floatX
+            )
+            self.ssm["initial_state", 2] = initial_trend
+
             beta = self.make_and_register_variable("beta", shape=(), dtype=floatX)
             R = pt.set_subtensor(R[2, 0], beta)
 
@@ -358,13 +377,19 @@ def make_symbolic_graph(self) -> None:
         T_components = [T_base]
 
         if self.seasonal:
+            initial_seasonal = self.make_and_register_variable(
+                "initial_seasonal", shape=(self.seasonal_periods,), dtype=floatX
+            )
+
+            self.ssm["initial_state", 2 + int(self.trend) :] = initial_seasonal
+
             gamma = self.make_and_register_variable("gamma", shape=(), dtype=floatX)
-            R = pt.set_subtensor(R[3, 0], gamma)
+            R = pt.set_subtensor(R[2 + int(self.trend), 0], gamma)
 
             # The seasonal component is always going to look like a TimeFrequency structural component, see that
             # docstring for more details
             T_seasonal = pt.eye(self.seasonal_periods, k=-1)
-            T_seasonal = pt.set_subtensor(T_seasonal[0, :], -1)
+            T_seasonal = pt.set_subtensor(T_seasonal[0, -1], 1.0)
             T_components += [T_seasonal]
 
         self.ssm["selection"] = R
@@ -375,8 +400,6 @@ def make_symbolic_graph(self) -> None:
         Z = np.zeros((self.k_endog, self.k_states))
         Z[0, 0] = 1.0  # innovation
         Z[0, 1] = 1.0  # level
-        if self.trend:
-            Z[0, 2] = 1.0
         if self.seasonal:
             Z[0, 2 + int(self.trend)] = 1.0
         self.ssm["design"] = Z
diff --git a/tests/statespace/test_ETS.py b/tests/statespace/test_ETS.py
@@ -1,15 +1,15 @@
 import numpy as np
 import pytensor
 import pytest
+import statsmodels.api as sm
 from numpy.testing import assert_allclose
 from pytensor.graph.basic import explicit_graph_inputs
 from scipy import linalg
+from statespace.utils.constants import LONG_MATRIX_NAMES
 
 from pymc_experimental.statespace.models.ETS import BayesianETS
-from tests.statespace.utilities.test_helpers import (
-    load_nile_test_data,
-    simulate_from_numpy_model,
-)
+from tests.statespace.utilities.shared_fixtures import rng
+from tests.statespace.utilities.test_helpers import load_nile_test_data
 
 
 @pytest.fixture(scope="session")
@@ -43,78 +43,66 @@ def tests_invalid_order_raises():
         BayesianETS(order=("A", "Ad", "A"))
 
 
+orders = (
+    ("A", "N", "N"),
+    ("A", "A", "N"),
+    ("A", "Ad", "N"),
+    ("A", "N", "A"),
+    ("A", "A", "A"),
+    ("A", "Ad", "A"),
+)
+order_names = (
+    "Basic",
+    "Trend",
+    "Damped Trend",
+    "Seasonal",
+    "Trend and Seasonal",
+    "Trend, Damped Trend, Seasonal",
+)
+
+order_expected_flags = (
+    {"trend": False, "damped_trend": False, "seasonal": False},
+    {"trend": True, "damped_trend": False, "seasonal": False},
+    {"trend": True, "damped_trend": True, "seasonal": False},
+    {"trend": False, "damped_trend": False, "seasonal": True},
+    {"trend": True, "damped_trend": False, "seasonal": True},
+    {"trend": True, "damped_trend": True, "seasonal": True},
+)
+
+order_params = (
+    ["alpha", "initial_level"],
+    ["alpha", "initial_level", "beta", "initial_trend"],
+    ["alpha", "initial_level", "beta", "initial_trend", "phi"],
+    ["alpha", "initial_level", "gamma", "initial_seasonal"],
+    ["alpha", "initial_level", "beta", "initial_trend", "gamma", "initial_seasonal"],
+    ["alpha", "initial_level", "beta", "initial_trend", "gamma", "initial_seasonal", "phi"],
+)
+
+
 @pytest.mark.parametrize(
-    "order, expected_flags",
-    [
-        (("A", "N", "N"), {"trend": False, "damped_trend": False, "seasonal": False}),
-        (("A", "A", "N"), {"trend": True, "damped_trend": False, "seasonal": False}),
-        (("A", "Ad", "N"), {"trend": True, "damped_trend": True, "seasonal": False}),
-        (("A", "N", "A"), {"trend": False, "damped_trend": False, "seasonal": True}),
-        (("A", "A", "A"), {"trend": True, "damped_trend": False, "seasonal": True}),
-        (("A", "Ad", "A"), {"trend": True, "damped_trend": True, "seasonal": True}),
-    ],
-    ids=[
-        "Basic",
-        "Trend",
-        "Damped Trend",
-        "Seasonal",
-        "Trend and Seasonal",
-        "Trend, Damped Trend, Seasonal",
-    ],
+    "order, expected_flags", zip(orders, order_expected_flags), ids=order_names
 )
 def test_order_flags(order, expected_flags):
     mod = BayesianETS(order=order, seasonal_periods=4)
     for key, value in expected_flags.items():
         assert getattr(mod, key) == value
 
 
-@pytest.mark.parametrize(
-    "order, expected_params",
-    [
-        (("A", "N", "N"), ["alpha"]),
-        (("A", "A", "N"), ["alpha", "beta"]),
-        (("A", "Ad", "N"), ["alpha", "beta", "phi"]),
-        (("A", "N", "A"), ["alpha", "gamma"]),
-        (("A", "A", "A"), ["alpha", "beta", "gamma"]),
-        (("A", "Ad", "A"), ["alpha", "beta", "gamma", "phi"]),
-    ],
-    ids=[
-        "Basic",
-        "Trend",
-        "Damped Trend",
-        "Seasonal",
-        "Trend and Seasonal",
-        "Trend, Damped Trend, Seasonal",
-    ],
-)
+@pytest.mark.parametrize("order, expected_params", zip(orders, order_params), ids=order_names)
 def test_param_info(order: tuple[str, str, str], expected_params):
     mod = BayesianETS(order=order, seasonal_periods=4)
 
-    all_expected_params = [*expected_params, "sigma_state", "x0", "P0"]
+    all_expected_params = [*expected_params, "sigma_state", "P0"]
     assert all(param in mod.param_names for param in all_expected_params)
     assert all(param in all_expected_params for param in mod.param_names)
-    assert all(mod.param_info[param]["dims"] is None for param in expected_params)
+    assert all(
+        mod.param_info[param]["dims"] is None
+        for param in expected_params
+        if "seasonal" not in param
+    )
 
 
-@pytest.mark.parametrize(
-    "order, expected_params",
-    [
-        (("A", "N", "N"), ["alpha"]),
-        (("A", "A", "N"), ["alpha", "beta"]),
-        (("A", "Ad", "N"), ["alpha", "beta", "phi"]),
-        (("A", "N", "A"), ["alpha", "gamma"]),
-        (("A", "A", "A"), ["alpha", "beta", "gamma"]),
-        (("A", "Ad", "A"), ["alpha", "beta", "gamma", "phi"]),
-    ],
-    ids=[
-        "Basic",
-        "Trend",
-        "Damped Trend",
-        "Seasonal",
-        "Trend and Seasonal",
-        "Trend, Damped Trend, Seasonal",
-    ],
-)
+@pytest.mark.parametrize("order, expected_params", zip(orders, order_params), ids=order_names)
 def test_statespace_matrices(order: tuple[str, str, str], expected_params: list[str]):
     seasonal_periods = np.random.randint(3, 12)
     mod = BayesianETS(order=order, seasonal_periods=seasonal_periods, measurement_error=True)
@@ -127,7 +115,9 @@ def test_statespace_matrices(order: tuple[str, str, str], expected_params: list[
         "phi": 0.95,
         "sigma_state": 0.1,
         "sigma_obs": 0.1,
-        "x0": np.zeros(expected_states),
+        "initial_level": 3.0,
+        "initial_trend": 1.0,
+        "initial_seasonal": np.ones(seasonal_periods),
         "initial_state_cov": np.eye(expected_states),
     }
 
@@ -161,42 +151,91 @@ def test_statespace_matrices(order: tuple[str, str, str], expected_params: list[
     Z_val[0, 0] = 1.0
     Z_val[0, 1] = 1.0
 
+    x0_val = np.zeros((expected_states,))
+    x0_val[1] = test_values["initial_level"]
+
     if order[1] == "N":
         T_val = np.array([[0.0, 0.0], [0.0, 1.0]])
     else:
+        x0_val[2] = test_values["initial_trend"]
         R_val[2] = test_values["beta"]
         T_val = np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]])
-        Z_val[0, 2] = 1.0
 
     if order[1] == "Ad":
         T_val[1:, -1] *= test_values["phi"]
 
     if order[2] == "A":
-        R_val[3] = test_values["gamma"]
+        x0_val[2 + int(order[1] != "N") :] = test_values["initial_seasonal"]
+        R_val[2 + int(order[1] != "N")] = test_values["gamma"]
         S = np.eye(seasonal_periods, k=-1)
-        S[0, :] = -1
+        S[0, -1] = 1.0
         Z_val[0, 2 + int(order[1] != "N")] = 1.0
     else:
         S = np.eye(0)
 
     T_val = linalg.block_diag(T_val, S)
 
+    assert_allclose(x0, x0_val)
     assert_allclose(T, T_val)
     assert_allclose(R, R_val)
     assert_allclose(Z, Z_val)
 
 
-def test_deterministic_simulation_matches_statsmodels():
-    mod = BayesianETS(order=("A", "Ad", "A"), seasonal_periods=4, measurement_error=False)
+@pytest.mark.parametrize("order, params", zip(orders, order_params), ids=order_names)
+def test_statespace_matches_statsmodels(rng, order: tuple[str, str, str], params):
+    seasonal_periods = rng.integers(3, 12)
+    data = rng.normal(size=(100,))
+    mod = BayesianETS(order=order, seasonal_periods=seasonal_periods, measurement_error=False)
+    sm_mod = sm.tsa.statespace.ExponentialSmoothing(
+        data,
+        trend=mod.trend,
+        damped_trend=mod.damped_trend,
+        seasonal=seasonal_periods if mod.seasonal else None,
+    )
+
+    simplex_params = ["alpha", "beta", "gamma"]
+    test_values = dict(zip(simplex_params, rng.dirichlet(alpha=np.ones(3))))
+    test_values["phi"] = rng.beta(1, 1)
+
+    test_values["initial_level"] = rng.normal()
+    test_values["initial_trend"] = rng.normal()
+    test_values["initial_seasonal"] = rng.normal(size=seasonal_periods)
+    test_values["initial_state_cov"] = np.eye(mod.k_states)
+    test_values["sigma_state"] = 1.0
+
+    sm_test_values = test_values.copy()
+    sm_test_values["smoothing_level"] = test_values["alpha"]
+    sm_test_values["smoothing_trend"] = test_values["beta"]
+    sm_test_values["smoothing_seasonal"] = test_values["gamma"]
+    sm_test_values["damping_trend"] = test_values["phi"]
+    sm_test_values["initial_seasonal"] = test_values["initial_seasonal"][0]
+    for i in range(1, seasonal_periods):
+        sm_test_values[f"initial_seasonal.L{i}"] = test_values["initial_seasonal"][i]
+
+    x0 = np.r_[
+        0, *[test_values[name] for name in ["initial_level", "initial_trend", "initial_seasonal"]]
+    ]
+    mask = [True, True, order[1] != "N", *(order[2] != "N",) * seasonal_periods]
+
+    sm_mod.initialize_known(initial_state=x0[mask], initial_state_cov=np.eye(mod.k_states))
+    sm_mod.fit_constrained({name: sm_test_values[name] for name in sm_mod.param_names})
+
+    matrices = mod._unpack_statespace_with_placeholders()
+    inputs = list(explicit_graph_inputs(matrices))
+    input_names = [x.name for x in inputs]
 
-    rng = np.random.default_rng()
-    test_values = {
-        "alpha": 0.7,
-        "beta": 0.15,
-        "gamma": 0.15,
-        "phi": 0.95,
-        "sigma_state": 0.0,
-        "x0": rng.normal(size=(7,)),
-        "initial_state_cov": np.eye(7),
-    }
-    hidden_states, observed = simulate_from_numpy_model(mod, rng, test_values)
+    f_matrices = pytensor.function(inputs, matrices)
+    test_values_subset = {name: test_values[name] for name in input_names}
+
+    matrices = f_matrices(**test_values_subset)
+    sm_matrices = [sm_mod.ssm[name] for name in LONG_MATRIX_NAMES[2:]]
+
+    for matrix, sm_matrix, name in zip(matrices[2:], sm_matrices, LONG_MATRIX_NAMES[2:]):
+        if name == "selection":
+            # statsmodel selection matrix seems to be wrong? They set the first element of the selection matrix to
+            # 1 - sum(alpha, beta, gamma), which doesn't match the equations presented in ffp3
+            assert_allclose(matrix[1:], sm_matrix[1:], err_msg=f"{name} does not match")
+            assert matrix[0] == 1.0
+            assert sm_matrix[0] != 1.0
+        else:
+            assert_allclose(matrix, sm_matrix, err_msg=f"{name} does not match")