[Feature] IsaacLab wrapper

vmoens · vmoens · commit d08e4885ad54 · 2025-05-06T10:00:05.000+01:00
ghstack-source-id: 1794a63 Pull-Request-resolved: #2937
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -1417,6 +1417,7 @@ the following function will return ``1`` when queried:
     HabitatEnv
     IsaacGymEnv
     IsaacGymWrapper
+    IsaacLabWrapper
     JumanjiEnv
     JumanjiWrapper
     MeltingpotEnv
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -32,32 +32,6 @@
 import pytest
 import torch
 
-if os.getenv("PYTORCH_TEST_FBCODE"):
-    from pytorch.rl.test._utils_internal import (
-        _make_multithreaded_env,
-        CARTPOLE_VERSIONED,
-        get_available_devices,
-        get_default_devices,
-        HALFCHEETAH_VERSIONED,
-        PENDULUM_VERSIONED,
-        PONG_VERSIONED,
-        rand_reset,
-        retry,
-        rollout_consistency_assertion,
-    )
-else:
-    from _utils_internal import (
-        _make_multithreaded_env,
-        CARTPOLE_VERSIONED,
-        get_available_devices,
-        get_default_devices,
-        HALFCHEETAH_VERSIONED,
-        PENDULUM_VERSIONED,
-        PONG_VERSIONED,
-        rand_reset,
-        retry,
-        rollout_consistency_assertion,
-    )
 from packaging import version
 from tensordict import (
     assert_allclose_td,
@@ -155,6 +129,33 @@
     ValueOperator,
 )
 
+if os.getenv("PYTORCH_TEST_FBCODE"):
+    from pytorch.rl.test._utils_internal import (
+        _make_multithreaded_env,
+        CARTPOLE_VERSIONED,
+        get_available_devices,
+        get_default_devices,
+        HALFCHEETAH_VERSIONED,
+        PENDULUM_VERSIONED,
+        PONG_VERSIONED,
+        rand_reset,
+        retry,
+        rollout_consistency_assertion,
+    )
+else:
+    from _utils_internal import (
+        _make_multithreaded_env,
+        CARTPOLE_VERSIONED,
+        get_available_devices,
+        get_default_devices,
+        HALFCHEETAH_VERSIONED,
+        PENDULUM_VERSIONED,
+        PONG_VERSIONED,
+        rand_reset,
+        retry,
+        rollout_consistency_assertion,
+    )
+
 _has_d4rl = importlib.util.find_spec("d4rl") is not None
 
 _has_mo = importlib.util.find_spec("mo_gymnasium") is not None
@@ -166,6 +167,9 @@
 _has_minari = importlib.util.find_spec("minari") is not None
 
 _has_gymnasium = importlib.util.find_spec("gymnasium") is not None
+
+_has_isaaclab = importlib.util.find_spec("isaaclab") is not None
+
 _has_gym_regular = importlib.util.find_spec("gym") is not None
 if _has_gymnasium:
     set_gym_backend("gymnasium").set()
@@ -4541,6 +4545,37 @@ def test_render(self, rollout_steps):
         assert not torch.equal(rollout_penultimate_image, image_from_env)
 
 
+@pytest.mark.skipif(not _has_isaaclab, reason="Isaaclab not found")
+class TestIsaacLab:
+    def test_isaaclab(self):
+        import gymnasium as gym
+        import isaaclab_tasks  # noqa: F401
+        from isaaclab_tasks.manager_based.classic.ant.ant_env_cfg import AntEnvCfg
+        from torchrl.envs.libs.isaac_lab import IsaacLabWrapper
+
+        env = gym.make("Isaac-Ant-v0", cfg=AntEnvCfg())
+        env = IsaacLabWrapper(env)
+        assert env.batch_size == (4096,)
+        env.check_env_specs(break_when_any_done="both")
+
+    def test_isaac_collector(self):
+        import gymnasium as gym
+        import isaaclab_tasks  # noqa: F401
+        from isaaclab_tasks.manager_based.classic.ant.ant_env_cfg import AntEnvCfg
+        from torchrl.envs.libs.isaac_lab import IsaacLabWrapper
+
+        env = gym.make("Isaac-Ant-v0", cfg=AntEnvCfg())
+        env = IsaacLabWrapper(env)
+        col = SyncDataCollector(
+            env, env.rand_action, frames_per_batch=1000, total_frames=100_000_000
+        )
+        for _ in col:
+            break
+
+    def test_isaaclab_reset(self):
+        ...
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/torchrl/envs/__init__.py b/torchrl/envs/__init__.py
@@ -20,6 +20,7 @@
     HabitatEnv,
     IsaacGymEnv,
     IsaacGymWrapper,
+    IsaacLabWrapper,
     JumanjiEnv,
     JumanjiWrapper,
     MeltingpotEnv,
@@ -131,6 +132,7 @@
     "ActionDiscretizer",
     "ActionMask",
     "VecNormV2",
+    "IsaacLabWrapper",
     "AutoResetEnv",
     "AutoResetTransform",
     "AsyncEnvPool",
diff --git a/torchrl/envs/gym_like.py b/torchrl/envs/gym_like.py
@@ -515,6 +515,8 @@ def validated(self, value):
     def _reset(
         self, tensordict: TensorDictBase | None = None, **kwargs
     ) -> TensorDictBase:
+        if tensordict is not None and "_reset" in tensordict and not tensordict["_reset"].all():
+            raise RuntimeError("Partial resets are not handled at this level.")
         obs, info = self._reset_output_transform(self._env.reset(**kwargs))
 
         source = self.read_obs(obs)
diff --git a/torchrl/envs/libs/__init__.py b/torchrl/envs/libs/__init__.py
@@ -16,6 +16,7 @@
     set_gym_backend,
 )
 from .habitat import HabitatEnv
+from .isaac_lab import IsaacLabWrapper
 from .isaacgym import IsaacGymEnv, IsaacGymWrapper
 from .jumanji import JumanjiEnv, JumanjiWrapper
 from .meltingpot import MeltingpotEnv, MeltingpotWrapper
@@ -32,22 +33,20 @@
     "BraxWrapper",
     "DMControlEnv",
     "DMControlWrapper",
-    "MultiThreadedEnv",
-    "MultiThreadedEnvWrapper",
-    "gym_backend",
     "GymEnv",
     "GymWrapper",
-    "MOGymEnv",
-    "MOGymWrapper",
-    "register_gym_spec_conversion",
-    "set_gym_backend",
     "HabitatEnv",
     "IsaacGymEnv",
     "IsaacGymWrapper",
+    "IsaacLabWrapper",
     "JumanjiEnv",
     "JumanjiWrapper",
+    "MOGymEnv",
+    "MOGymWrapper",
     "MeltingpotEnv",
     "MeltingpotWrapper",
+    "MultiThreadedEnv",
+    "MultiThreadedEnvWrapper",
     "OpenMLEnv",
     "OpenSpielEnv",
     "OpenSpielWrapper",
@@ -60,4 +59,7 @@
     "UnityMLAgentsWrapper",
     "VmasEnv",
     "VmasWrapper",
+    "gym_backend",
+    "register_gym_spec_conversion",
+    "set_gym_backend",
 ]
diff --git a/torchrl/envs/libs/gym.py b/torchrl/envs/libs/gym.py
@@ -53,6 +53,7 @@
 
 _has_mo = importlib.util.find_spec("mo_gymnasium") is not None
 _has_sb3 = importlib.util.find_spec("stable_baselines3") is not None
+_has_isaaclab = importlib.util.find_spec("isaaclab") is not None
 _has_minigrid = importlib.util.find_spec("minigrid") is not None
 
 
@@ -803,6 +804,11 @@ def __call__(cls, *args, **kwargs):
                 VecGymEnvTransform,
             )
 
+            if _has_isaaclab:
+                from isaaclab.envs import ManagerBasedRLEnv
+                if isinstance(instance._env.unwrapped, ManagerBasedRLEnv):
+                    return TransformedEnv(instance, VecGymEnvTransform())
+
             if _has_sb3:
                 from stable_baselines3.common.vec_env.base_vec_env import VecEnv
 
@@ -1069,12 +1075,15 @@ def _post_init(self):
 
     @property
     def _is_batched(self):
+        tuple_of_classes = ()
         if _has_sb3:
             from stable_baselines3.common.vec_env.base_vec_env import VecEnv
 
-            tuple_of_classes = (VecEnv,)
-        else:
-            tuple_of_classes = ()
+            tuple_of_classes = tuple_of_classes + (VecEnv,)
+        if _has_isaaclab:
+            from isaaclab.envs import ManagerBasedRLEnv
+
+            tuple_of_classes = tuple_of_classes + (ManagerBasedRLEnv,)
         return isinstance(
             self._env, tuple_of_classes + (gym_backend("vector").VectorEnv,)
         )
diff --git a/torchrl/envs/libs/isaac_lab.py b/torchrl/envs/libs/isaac_lab.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from torchrl.envs.libs.gym import GymWrapper
+
+
+class IsaacLabWrapper(GymWrapper):
+    def __init__(
+        self,
+        env: "ManagerBasedRLEnv",
+        categorical_action_encoding=False,
+        allow_done_after_reset=True,
+        convert_actions_to_numpy=False,
+        device=torch.device("cuda:0"),
+        **kwargs,
+    ):
+        """
+        Here we are setting some parameters that are what we need for IsaacLab.
+        """
+        super().__init__(
+            env,
+            device=device,
+            categorical_action_encoding=categorical_action_encoding,
+            allow_done_after_reset=allow_done_after_reset,
+            convert_actions_to_numpy=convert_actions_to_numpy,
+            **kwargs,
+        )
+
+    def seed(self, seed: int | None):
+        self._set_seed(seed)
+
+    def _output_transform(self, step_outputs_tuple):  # noqa: F811
+        # IsaacLab will modify the `terminated` and `truncated` tensors
+        #  in-place. We clone them here to make sure data doesn't inadvertently get modified.
+        # The variable naming follows torchrl's convention here.
+        observations, reward, terminated, truncated, info = step_outputs_tuple
+        done = terminated | truncated
+        reward = reward.unsqueeze(-1)  # to get to (num_envs, 1)
+        return (
+            observations,
+            reward,
+            terminated.clone(),
+            truncated.clone(),
+            done.clone(),
+            info,
+        )
+
+
+if __name__ == "__main__":
+    import argparse
+
+    from isaaclab.app import AppLauncher
+    from torchrl.envs.libs.isaac_lab import IsaacLabWrapper
+
+    parser = argparse.ArgumentParser(description="Train an RL agent with skrl.")
+    AppLauncher.add_app_launcher_args(parser)
+    args_cli, hydra_args = parser.parse_known_args()
+
+    app_launcher = AppLauncher(args_cli)
+    import gymnasium as gym
+    import isaaclab_tasks  # noqa: F401
+    from isaaclab_tasks.manager_based.classic.ant.ant_env_cfg import AntEnvCfg
+
+    if __name__ == "__main__":
+        # import isaaclab_tasks
+
+        env = gym.make("Isaac-Ant-v0", cfg=AntEnvCfg())
+        env = IsaacLabWrapper(env)
+
+        import tqdm
+
+        # env.check_env_specs(break_when_any_done="both")
+        # env.check_env_specs(break_when_any_done="both")
+        from torchrl.collectors import SyncDataCollector
+        from torchrl.record.loggers.wandb import WandbLogger
+
+        logger = WandbLogger(exp_name="test_isaac")
+        col = SyncDataCollector(
+            env, env.rand_action, frames_per_batch=1000, total_frames=100_000_000
+        )
+        for d in tqdm.tqdm(col):
+            logger.log_scalar("frames", col._frames)
diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py
@@ -14,7 +14,7 @@
 import re
 import warnings
 from enum import Enum
-from typing import Any
+from typing import Any, Literal
 
 import torch
 
@@ -687,7 +687,7 @@ def check_env_specs(
     check_dtype=True,
     seed: int | None = None,
     tensordict: TensorDictBase | None = None,
-    break_when_any_done: bool | str = None,
+    break_when_any_done: bool | Literal["both"] = None,
 ):
     """Tests an environment specs against the results of short rollout.