diff --git a/source/isaaclab/isaaclab/envs/mdp/rewards.py b/source/isaaclab/isaaclab/envs/mdp/rewards.py index b1601ec44a..ff050c4917 100644 --- a/source/isaaclab/isaaclab/envs/mdp/rewards.py +++ b/source/isaaclab/isaaclab/envs/mdp/rewards.py @@ -279,6 +279,16 @@ def contact_forces(env: ManagerBasedRLEnv, threshold: float, sensor_cfg: SceneEn return torch.sum(violation.clip(min=0.0), dim=1) +def desired_contacts_all(env, sensor_cfg: SceneEntityCfg, threshold: float = 1.0) -> torch.Tensor: + """Penalize if not all of the desired contacts are present.""" + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + contacts = ( + contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > threshold + ) + zero_contact = (~contacts).all(dim=1) + return 1.0 * zero_contact + + """ Velocity-tracking rewards. """ diff --git a/source/isaaclab_assets/isaaclab_assets/robots/agility.py b/source/isaaclab_assets/isaaclab_assets/robots/agility.py new file mode 100644 index 0000000000..83e08889d0 --- /dev/null +++ b/source/isaaclab_assets/isaaclab_assets/robots/agility.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets.articulation import ArticulationCfg +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +LEG_JOINT_NAMES = [ + ".*_hip_roll", + ".*_hip_yaw", + ".*_hip_pitch", + ".*_knee", + ".*_toe_a", + ".*_toe_b", +] + +ARM_JOINT_NAMES = [".*_arm_.*"] + + +DIGIT_V4_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Agility/Digit/digit_v4.usd", + activate_contact_sensors=True, + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 1.05), + ), + soft_joint_pos_limit_factor=0.9, + actuators={ + "all": ImplicitActuatorCfg( + joint_names_expr=".*", + stiffness=None, + damping=None, + ), + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/__init__.py new file mode 100644 index 0000000000..15b6d29060 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Locomotion environments for legged robots.""" + +from .tracking import * # noqa diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/__init__.py new file mode 100644 index 0000000000..e75ca2bc3f --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/__init__.py new file mode 100644 index 0000000000..e75ca2bc3f --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/__init__.py new file mode 100644 index 0000000000..4da47ae8f7 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/__init__.py @@ -0,0 +1,32 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## +gym.register( + id="Isaac-Tracking-LocoManip-Digit-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.loco_manip_env_cfg:DigitLocoManipEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:DigitLocoManipPPORunnerCfg", + }, +) + + +gym.register( + id="Isaac-Tracking-LocoManip-Digit-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.loco_manip_env_cfg:DigitLocoManipEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:DigitLocoManipPPORunnerCfg", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/__init__.py new file mode 100644 index 0000000000..e75ca2bc3f --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000000..543849a920 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + +from isaaclab.utils import configclass + + +@configclass +class DigitLocoManipPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 2000 + save_interval = 50 + experiment_name = "digit_loco_manip" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 128], + critic_hidden_dims=[256, 128, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.01, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/loco_manip_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/loco_manip_env_cfg.py new file mode 100644 index 0000000000..2e7168c8d2 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/loco_manip_env_cfg.py @@ -0,0 +1,250 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math + +from isaaclab_assets.robots.agility import ARM_JOINT_NAMES, LEG_JOINT_NAMES + +from isaaclab.managers import EventTermCfg +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils import configclass +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise + +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp +import isaaclab_tasks.manager_based.manipulation.reach.mdp as manipulation_mdp +from isaaclab_tasks.manager_based.locomotion.velocity.config.digit.rough_env_cfg import DigitRewards, DigitRoughEnvCfg +from isaaclab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import EventCfg + + +@configclass +class DigitLocoManipRewards(DigitRewards): + joint_deviation_arms = None + + joint_vel_hip_yaw = RewTerm( + func=mdp.joint_vel_l2, + weight=-0.001, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_leg_hip_yaw"])}, + ) + + left_ee_pos_tracking = RewTerm( + func=manipulation_mdp.position_command_error, + weight=-2.0, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="left_arm_wrist_yaw"), + "command_name": "left_ee_pose", + }, + ) + + left_ee_pos_tracking_fine_grained = RewTerm( + func=manipulation_mdp.position_command_error_tanh, + weight=2.0, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="left_arm_wrist_yaw"), + "std": 0.05, + "command_name": "left_ee_pose", + }, + ) + + left_end_effector_orientation_tracking = RewTerm( + func=manipulation_mdp.orientation_command_error, + weight=-0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="left_arm_wrist_yaw"), + "command_name": "left_ee_pose", + }, + ) + + right_ee_pos_tracking = RewTerm( + func=manipulation_mdp.position_command_error, + weight=-2.0, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="right_arm_wrist_yaw"), + "command_name": "right_ee_pose", + }, + ) + + right_ee_pos_tracking_fine_grained = RewTerm( + func=manipulation_mdp.position_command_error_tanh, + weight=2.0, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="right_arm_wrist_yaw"), + "std": 0.05, + "command_name": "right_ee_pose", + }, + ) + + right_end_effector_orientation_tracking = RewTerm( + func=manipulation_mdp.orientation_command_error, + weight=-0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="right_arm_wrist_yaw"), + "command_name": "right_ee_pose", + }, + ) + + +@configclass +class DigitLocoManipObservations: + + @configclass + class PolicyCfg(ObsGroup): + base_lin_vel = ObsTerm( + func=mdp.base_lin_vel, + noise=Unoise(n_min=-0.1, n_max=0.1), + ) + base_ang_vel = ObsTerm( + func=mdp.base_ang_vel, + noise=Unoise(n_min=-0.2, n_max=0.2), + ) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm( + func=mdp.generated_commands, + params={"command_name": "base_velocity"}, + ) + left_ee_pose_command = ObsTerm( + func=mdp.generated_commands, + params={"command_name": "left_ee_pose"}, + ) + right_ee_pose_command = ObsTerm( + func=mdp.generated_commands, + params={"command_name": "right_ee_pose"}, + ) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=LEG_JOINT_NAMES + ARM_JOINT_NAMES)}, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=LEG_JOINT_NAMES + ARM_JOINT_NAMES)}, + noise=Unoise(n_min=-1.5, n_max=1.5), + ) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + policy = PolicyCfg() + + +@configclass +class DigitLocoManipCommands: + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.25, + rel_heading_envs=1.0, + heading_command=True, + debug_vis=True, + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(-1.0, 1.0), + lin_vel_y=(-1.0, 1.0), + ang_vel_z=(-1.0, 1.0), + heading=(-math.pi, math.pi), + ), + ) + + left_ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name="left_arm_wrist_yaw", + resampling_time_range=(1.0, 3.0), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.10, 0.50), + pos_y=(0.05, 0.50), + pos_z=(-0.20, 0.20), + roll=(-0.1, 0.1), + pitch=(-0.1, 0.1), + yaw=(math.pi / 2.0 - 0.1, math.pi / 2.0 + 0.1), + ), + ) + + right_ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name="right_arm_wrist_yaw", + resampling_time_range=(1.0, 3.0), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.10, 0.50), + pos_y=(-0.50, -0.05), + pos_z=(-0.20, 0.20), + roll=(-0.1, 0.1), + pitch=(-0.1, 0.1), + yaw=(-math.pi / 2.0 - 0.1, -math.pi / 2.0 + 0.1), + ), + ) + + +@configclass +class DigitEvents(EventCfg): + # Add an external force to simulate a payload being carried. + left_hand_force = EventTermCfg( + func=mdp.apply_external_force_torque, + mode="interval", + interval_range_s=(10.0, 15.0), + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="left_arm_wrist_yaw"), + "force_range": (-10.0, 10.0), + "torque_range": (-1.0, 1.0), + }, + ) + + right_hand_force = EventTermCfg( + func=mdp.apply_external_force_torque, + mode="interval", + interval_range_s=(10.0, 15.0), + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="right_arm_wrist_yaw"), + "force_range": (-10.0, 10.0), + "torque_range": (-1.0, 1.0), + }, + ) + + +@configclass +class DigitLocoManipEnvCfg(DigitRoughEnvCfg): + rewards: DigitLocoManipRewards = DigitLocoManipRewards() + observations: DigitLocoManipObservations = DigitLocoManipObservations() + commands: DigitLocoManipCommands = DigitLocoManipCommands() + + def __post_init__(self): + super().__post_init__() + + self.episode_length_s = 14.0 + + # Rewards: + self.rewards.flat_orientation_l2.weight = -10.5 + self.rewards.termination_penalty.weight = -100.0 + + # Change terrain to flat. + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + # Remove height scanner. + self.scene.height_scanner = None + self.observations.policy.height_scan = None + # Remove terrain curriculum. + self.curriculum.terrain_levels = None + + +class DigitLocoManipEnvCfg_PLAY(DigitLocoManipEnvCfg): + + def __post_init__(self) -> None: + super().__post_init__() + + # Make a smaller scene for play. + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # Disable randomization for play. + self.observations.policy.enable_corruption = False + # Remove random pushing. + self.randomization.base_external_force_torque = None + self.randomization.push_robot = None diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/__init__.py new file mode 100644 index 0000000000..5acc905c98 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/__init__.py @@ -0,0 +1,54 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## +gym.register( + id="Isaac-Velocity-Flat-Digit-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:DigitFlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:DigitFlatPPORunnerCfg", + }, +) + + +gym.register( + id="Isaac-Velocity-Flat-Digit-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:DigitFlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:DigitFlatPPORunnerCfg", + }, +) + + +gym.register( + id="Isaac-Velocity-Rough-Digit-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.rough_env_cfg:DigitRoughEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:DigitRoughPPORunnerCfg", + }, +) + + +gym.register( + id="Isaac-Velocity-Rough-Digit-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.rough_env_cfg:DigitRoughEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:DigitRoughPPORunnerCfg", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/__init__.py new file mode 100644 index 0000000000..e75ca2bc3f --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 0000000000..6070148167 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,49 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + +from isaaclab.utils import configclass + + +@configclass +class DigitRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 3000 + save_interval = 50 + experiment_name = "digit_rough" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[512, 256, 128], + critic_hidden_dims=[512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.01, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + +@configclass +class DigitFlatPPORunnerCfg(DigitRoughPPORunnerCfg): + def __post_init__(self): + super().__post_init__() + + self.max_iterations = 2000 + self.experiment_name = "digit_flat" + + self.policy.actor_hidden_dims = [128, 128, 128] + self.policy.critic_hidden_dims = [128, 128, 128] diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/flat_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/flat_env_cfg.py new file mode 100644 index 0000000000..4c3d9829b9 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/flat_env_cfg.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from .rough_env_cfg import DigitRoughEnvCfg + + +@configclass +class DigitFlatEnvCfg(DigitRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + + # Change terrain to flat. + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + # Remove height scanner. + self.scene.height_scanner = None + self.observations.policy.height_scan = None + # Remove terrain curriculum. + self.curriculum.terrain_levels = None + + +class DigitFlatEnvCfg_PLAY(DigitFlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + + # Make a smaller scene for play. + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # Disable randomization for play. + self.observations.policy.enable_corruption = False + # Remove random pushing. + self.randomization.base_external_force_torque = None + self.randomization.push_robot = None diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/rough_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/rough_env_cfg.py new file mode 100644 index 0000000000..4f08025ea8 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/rough_env_cfg.py @@ -0,0 +1,264 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math + +from isaaclab_assets.robots.agility import ARM_JOINT_NAMES, DIGIT_V4_CFG, LEG_JOINT_NAMES + +from isaaclab.managers import ObservationGroupCfg, ObservationTermCfg, RewardTermCfg, SceneEntityCfg, TerminationTermCfg +from isaaclab.utils import configclass +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise + +import isaaclab_tasks.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + + +@configclass +class DigitRewards: + termination_penalty = RewardTermCfg( + func=mdp.is_terminated, + weight=-100.0, + ) + track_lin_vel_xy_exp = RewardTermCfg( + func=mdp.track_lin_vel_xy_yaw_frame_exp, + weight=1.0, + params={"command_name": "base_velocity", "std": math.sqrt(0.25)}, + ) + track_ang_vel_z_exp = RewardTermCfg( + func=mdp.track_ang_vel_z_world_exp, + weight=1.0, + params={ + "command_name": "base_velocity", + "std": math.sqrt(0.25), + }, + ) + feet_air_time = RewardTermCfg( + func=mdp.feet_air_time_positive_biped, + weight=0.25, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_leg_toe_roll"), + "threshold": 0.8, + "command_name": "base_velocity", + }, + ) + feet_slide = RewardTermCfg( + func=mdp.feet_slide, + weight=-0.25, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_leg_toe_roll"), + "asset_cfg": SceneEntityCfg("robot", body_names=".*_leg_toe_roll"), + }, + ) + dof_torques_l2 = RewardTermCfg( + func=mdp.joint_torques_l2, + weight=-1.0e-6, + ) + dof_acc_l2 = RewardTermCfg( + func=mdp.joint_acc_l2, + weight=-2.0e-7, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=LEG_JOINT_NAMES + ARM_JOINT_NAMES)}, + ) + action_rate_l2 = RewardTermCfg( + func=mdp.action_rate_l2, + weight=-0.008, + ) + flat_orientation_l2 = RewardTermCfg( + func=mdp.flat_orientation_l2, + weight=-2.5, + ) + stand_still = RewardTermCfg( + func=mdp.stand_still_joint_deviation_l1, + weight=-0.4, + params={ + "command_name": "base_velocity", + "asset_cfg": SceneEntityCfg("robot", joint_names=LEG_JOINT_NAMES), + }, + ) + lin_vel_z_l2 = RewardTermCfg( + func=mdp.lin_vel_z_l2, + weight=-2.0, + ) + ang_vel_xy_l2 = RewardTermCfg( + func=mdp.ang_vel_xy_l2, + weight=-0.1, + ) + no_jumps = RewardTermCfg( + func=mdp.desired_contacts_all, + weight=-0.5, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=[".*_leg_toe_roll"])}, + ) + dof_pos_limits = RewardTermCfg( + func=mdp.joint_pos_limits, + weight=-1.0, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_leg_toe_roll", ".*_leg_toe_pitch", ".*_tarsus"])}, + ) + joint_deviation_hip_roll = RewardTermCfg( + func=mdp.joint_deviation_l1, + weight=-0.1, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_leg_hip_roll")}, + ) + joint_deviation_hip_yaw = RewardTermCfg( + func=mdp.joint_deviation_l1, + weight=-0.2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_leg_hip_yaw")}, + ) + joint_deviation_knee = RewardTermCfg( + func=mdp.joint_deviation_l1, + weight=-0.2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_tarsus")}, + ) + joint_deviation_feet = RewardTermCfg( + func=mdp.joint_deviation_l1, + weight=-0.1, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_toe_a", ".*_toe_b"])}, + ) + joint_deviation_arms = RewardTermCfg( + func=mdp.joint_deviation_l1, + weight=-0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*_arm_.*"), + }, + ) + + undesired_contacts = RewardTermCfg( + func=mdp.undesired_contacts, + weight=-0.1, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=[".*_rod", ".*_tarsus"]), + "threshold": 1.0, + }, + ) + + +@configclass +class DigitObservations: + @configclass + class PolicyCfg(ObservationGroupCfg): + base_lin_vel = ObservationTermCfg( + func=mdp.base_lin_vel, + noise=Unoise(n_min=-0.1, n_max=0.1), + ) + base_ang_vel = ObservationTermCfg( + func=mdp.base_ang_vel, + noise=Unoise(n_min=-0.2, n_max=0.2), + ) + projected_gravity = ObservationTermCfg( + func=mdp.projected_gravity, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObservationTermCfg( + func=mdp.generated_commands, + params={"command_name": "base_velocity"}, + ) + joint_pos = ObservationTermCfg( + func=mdp.joint_pos_rel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=LEG_JOINT_NAMES + ARM_JOINT_NAMES)}, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + joint_vel = ObservationTermCfg( + func=mdp.joint_vel_rel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=LEG_JOINT_NAMES + ARM_JOINT_NAMES)}, + noise=Unoise(n_min=-1.5, n_max=1.5), + ) + actions = ObservationTermCfg(func=mdp.last_action) + height_scan = ObservationTermCfg( + func=mdp.height_scan, + params={"sensor_cfg": SceneEntityCfg("height_scanner")}, + noise=Unoise(n_min=-0.1, n_max=0.1), + clip=(-1.0, 1.0), + ) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # Observation groups: + policy: PolicyCfg = PolicyCfg() + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = TerminationTermCfg(func=mdp.time_out, time_out=True) + base_contact = TerminationTermCfg( + func=mdp.illegal_contact, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=["torso_base"]), + "threshold": 1.0, + }, + ) + base_orientation = TerminationTermCfg( + func=mdp.bad_orientation, + params={"limit_angle": 0.7}, + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=LEG_JOINT_NAMES + ARM_JOINT_NAMES, + scale=0.5, + use_default_offset=True, + ) + + +@configclass +class DigitRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + rewards: DigitRewards = DigitRewards() + observations: DigitObservations = DigitObservations() + terminations: TerminationsCfg = TerminationsCfg() + actions: ActionsCfg = ActionsCfg() + + def __post_init__(self): + super().__post_init__() + self.decimation = 4 + self.sim.dt = 0.005 + + # Scene + self.scene.robot = DIGIT_V4_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.height_scanner.prim_path = "{ENV_REGEX_NS}/Robot/torso_base" + self.scene.contact_forces.history_length = self.decimation + self.scene.contact_forces.update_period = self.sim.dt + self.scene.height_scanner.update_period = self.decimation * self.sim.dt + + # Events: + self.events.add_base_mass.params["asset_cfg"] = SceneEntityCfg("robot", body_names="torso_base") + self.events.base_external_force_torque.params["asset_cfg"] = SceneEntityCfg("robot", body_names="torso_base") + # Don't randomize the initial joint positions because we have closed loops. + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + + # Commands + self.commands.base_velocity.ranges.lin_vel_x = (-0.8, 0.8) + self.commands.base_velocity.ranges.lin_vel_y = (-0.5, 0.5) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + self.commands.base_velocity.rel_standing_envs = 0.1 + self.commands.base_velocity.resampling_time_range = (3.0, 8.0) + + +@configclass +class DigitRoughEnvCfg_PLAY(DigitRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + + # Make a smaller scene for play. + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # Spawn the robot randomly in the grid (instead of their terrain levels). + self.scene.terrain.max_init_terrain_level = None + # Reduce the number of terrains to save memory. + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + + # Disable randomization for play. + self.observations.policy.enable_corruption = False + # Remove random pushing. + self.randomization.base_external_force_torque = None + self.randomization.push_robot = None diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py index a4245a206f..a5b303d711 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/rewards.py @@ -14,6 +14,7 @@ import torch from typing import TYPE_CHECKING +from isaaclab.envs import mdp from isaaclab.managers import SceneEntityCfg from isaaclab.sensors import ContactSensor from isaaclab.utils.math import quat_rotate_inverse, yaw_quat @@ -104,3 +105,12 @@ def track_ang_vel_z_world_exp( asset = env.scene[asset_cfg.name] ang_vel_error = torch.square(env.command_manager.get_command(command_name)[:, 2] - asset.data.root_ang_vel_w[:, 2]) return torch.exp(-ang_vel_error / std**2) + + +def stand_still_joint_deviation_l1( + env, command_name: str, command_threshold: float = 0.06, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> torch.Tensor: + """Penalize offsets from the default joint positions when the command is very small.""" + command = env.command_manager.get_command(command_name) + # Penalize motion when command is nearly zero. + return mdp.joint_deviation_l1(env, asset_cfg) * (torch.norm(command[:, :2], dim=1) < command_threshold)