[Test] Str2StrEnv test

vmoens · vmoens · commit 5fd509232a30 · 2025-01-30T09:22:05.000-08:00
ghstack-source-id: 45a0e5f Pull Request resolved: #2725
diff --git a/test/mocking_classes.py b/test/mocking_classes.py
@@ -1068,6 +1068,11 @@ def _step(
         return tensordict
 
 
+def get_random_string(min_size, max_size):
+    size = random.randint(min_size, max_size)
+    return "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+
+
 class CountingEnvWithString(CountingEnv):
     def __init__(self, *args, **kwargs):
         self.max_size = kwargs.pop("max_size", 30)
@@ -1083,8 +1088,7 @@ def __init__(self, *args, **kwargs):
         )
 
     def get_random_string(self):
-        size = random.randint(self.min_size, self.max_size)
-        return "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+        return get_random_string(self.min_size, self.max_size)
 
     def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
         res = super()._reset(tensordict, **kwargs)
@@ -2202,3 +2206,39 @@ def _step(
 
     def _set_seed(self, seed):
         ...
+
+
+class Str2StrEnv(EnvBase):
+    def __init__(self, min_size=4, max_size=10, **kwargs):
+        self.observation_spec = Composite(
+            observation=NonTensor(example_data="an observation!", shape=())
+        )
+        self.full_action_spec = Composite(
+            action=NonTensor(example_data="an action!", shape=())
+        )
+        self.reward_spec = Unbounded(shape=(1,), dtype=torch.float)
+        self.min_size = min_size
+        self.max_size = max_size
+        super().__init__(**kwargs)
+
+    def _step(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
+        assert isinstance(tensordict["action"], str)
+        out = tensordict.empty()
+        out.set("observation", self.get_random_string())
+        out.set("done", torch.zeros(1, dtype=torch.bool).bernoulli_(0.01))
+        out.set("reward", torch.zeros(1, dtype=torch.float).bernoulli_(0.01))
+        return out
+
+    def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
+        out = tensordict.empty() if tensordict is not None else TensorDict()
+        out.set("observation", self.get_random_string())
+        out.set("done", torch.zeros(1, dtype=torch.bool).bernoulli_(0.01))
+        return out
+
+    def get_random_string(self):
+        return get_random_string(self.min_size, self.max_size)
+
+    def _set_seed(self, seed: Optional[int]):
+        random.seed(seed)
+        torch.manual_seed(0)
+        return seed
diff --git a/test/test_env.py b/test/test_env.py
@@ -57,6 +57,7 @@
         MultiKeyCountingEnv,
         MultiKeyCountingEnvPolicy,
         NestedCountingEnv,
+        Str2StrEnv,
     )
 else:
     from _utils_internal import (
@@ -95,6 +96,7 @@
         MultiKeyCountingEnv,
         MultiKeyCountingEnvPolicy,
         NestedCountingEnv,
+        Str2StrEnv,
     )
 from packaging import version
 from tensordict import (
@@ -133,6 +135,7 @@
     AutoResetTransform,
     Tokenizer,
     Transform,
+    UnsqueezeTransform,
 )
 from torchrl.envs.utils import (
     _StepMDP,
@@ -174,6 +177,7 @@
 _has_chess = importlib.util.find_spec("chess") is not None
 _has_tv = importlib.util.find_spec("torchvision") is not None
 _has_cairosvg = importlib.util.find_spec("cairosvg") is not None
+_has_transformers = importlib.util.find_spec("transformers") is not None
 ## TO BE FIXED: DiscreteActionProjection queries a randint on each worker, which leads to divergent results between
 ## the serial and parallel batched envs
 # def _make_atari_env(atari_env):
@@ -2614,6 +2618,7 @@ def test_parallel(
         NestedCountingEnv,
         HeterogeneousCountingEnv,
         MultiKeyCountingEnv,
+        Str2StrEnv,
     ],
 )
 def test_mocking_envs(envclass):
@@ -3441,6 +3446,96 @@ def test_partial_rest(self, batched):
         assert s_["string"] == ["0", "6"]
         assert s["next", "string"] == ["6", "6"]
 
+    @pytest.mark.skipif(not _has_transformers, reason="transformers required")
+    def test_str2str_env_tokenizer(self):
+        env = Str2StrEnv()
+        env.set_seed(0)
+        env = env.append_transform(
+            Tokenizer(
+                in_keys=["observation"],
+                out_keys=["obs_tokens"],
+                in_keys_inv=["action"],
+                out_keys_inv=["action_tokens"],
+            )
+        )
+        env.check_env_specs()
+        assert env._has_dynamic_specs
+        r = env.rollout(3, return_contiguous=False)
+        assert len(r) == 3
+        assert isinstance(r["observation"], list)
+        r = r.densify(layout=torch.jagged)
+        assert isinstance(r["observation"], list)
+        assert isinstance(r["obs_tokens"], torch.Tensor)
+        assert isinstance(r["action_tokens"], torch.Tensor)
+
+    @pytest.mark.skipif(not _has_transformers, reason="transformers required")
+    def test_str2str_env_tokenizer_catframes(self):
+        """Tests that we can use Unsqueeze + CatFrames with tokenized strings of variable lengths."""
+        env = Str2StrEnv()
+        env.set_seed(0)
+        env = env.append_transform(
+            Tokenizer(
+                in_keys=["observation"],
+                out_keys=["obs_tokens"],
+                in_keys_inv=["action"],
+                out_keys_inv=["action_tokens"],
+                # We must use max_length otherwise we can't call cat
+                # Perhaps we could use NJT here?
+                max_length=10,
+            )
+        )
+        env = env.append_transform(
+            UnsqueezeTransform(
+                dim=-2, in_keys=["obs_tokens"], out_keys=["obs_tokens_cat"]
+            ),
+        )
+        env = env.append_transform(CatFrames(N=4, dim=-2, in_keys=["obs_tokens_cat"]))
+        r = env.rollout(3)
+        assert r["obs_tokens_cat"].shape == (3, 4, 10)
+
+    @pytest.mark.skipif(not _has_transformers, reason="transformers required")
+    def test_str2str_rb_slicesampler(self):
+        """Dedicated test for replay buffer sampling of trajectories with variable token length"""
+        from torchrl.data import LazyStackStorage, ReplayBuffer, SliceSampler
+        from torchrl.envs import TrajCounter
+
+        env = Str2StrEnv()
+        env.set_seed(0)
+        env = env.append_transform(
+            Tokenizer(
+                in_keys=["observation"],
+                out_keys=["obs_tokens"],
+                in_keys_inv=["action"],
+                out_keys_inv=["action_tokens"],
+            )
+        )
+        env = env.append_transform(StepCounter(max_steps=10))
+        env = env.append_transform(TrajCounter())
+        rb = ReplayBuffer(
+            storage=LazyStackStorage(100),
+            sampler=SliceSampler(slice_len=10, end_key=("next", "done")),
+        )
+        r0 = env.rollout(20, break_when_any_done=False)
+        rb.extend(r0)
+        has_0 = False
+        has_1 = False
+        for _ in range(100):
+            v0 = rb.sample(10)
+            assert (v0["step_count"].squeeze() == torch.arange(10)).all()
+            assert (v0["next", "step_count"].squeeze() == torch.arange(1, 11)).all()
+            try:
+                traj = v0["traj_count"].unique().item()
+            except Exception:
+                raise RuntimeError(
+                    f"More than one traj found in single slice: {v0['traj_count']}"
+                )
+            has_0 |= traj == 0
+            has_1 |= traj == 1
+            if has_0 and has_1:
+                break
+        else:
+            raise RuntimeError("Failed to sample both trajs")
+
 
 # fen strings for board positions generated with:
 # https://lichess.org/editor
@@ -3676,6 +3771,7 @@ def test_reward(
         assert td["reward"] == expected_reward
         assert td["turn"] == (not expected_turn)
 
+    @pytest.mark.skipif(not _has_transformers, reason="transformers required")
     def test_chess_tokenized(self):
         env = ChessEnv(include_fen=True, stateful=True, include_san=True)
         assert isinstance(env.observation_spec["fen"], NonTensor)
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -2450,6 +2450,9 @@ class NonTensor(TensorSpec):
 
     :meth:`.rand` will return a :class:`~tensordict.NonTensorData` object with `None` data value.
     (same will go for :meth:`.zero` and :meth:`.one`).
+
+    .. note:: The default shape of `NonTensor` is `(1,)`.
+
     """
 
     example_data: Any = None