Elegant!

yuezhezhang · yuezhezhang · commit 1f2ec65525c0 · 2024-09-20T15:57:20.000+02:00
diff --git a/examples/example_aip_battery_task.py b/examples/example_aip_battery_task.py
diff --git a/scripts/reactive_tamp.py b/scripts/reactive_tamp.py
@@ -20,7 +20,6 @@
 
 class REACTIVE_TAMP:
     def __init__(self, cfg) -> None:
-
         self.sim = wrapper.IsaacGymWrapper(
             cfg.isaacgym,
             cfg.env_type,
diff --git a/scripts/sim.py b/scripts/sim.py
@@ -17,7 +17,6 @@
 
 @hydra.main(version_base=None, config_path="../src/m3p2i_aip/config", config_name="config_point")
 def run_sim(cfg: ExampleConfig):
-
     sim = wrapper.IsaacGymWrapper(
         cfg.isaacgym,
         cfg.env_type,
@@ -43,7 +42,6 @@ def run_sim(cfg: ExampleConfig):
             planner.run_tamp(
                 torch_to_bytes(sim._dof_state), torch_to_bytes(sim._root_state))
         )
-        # print("task", cfg.task, "action", action)
         sim.set_dof_velocity_target_tensor(action)
 
         cfg.suction_active = bytes_to_torch(
diff --git a/src/m3p2i_aip/planners/motion_planner/cost_functions.py b/src/m3p2i_aip/planners/motion_planner/cost_functions.py
@@ -54,13 +54,10 @@ def get_push_cost(self, sim: wrapper, block_goal: torch.tensor):
         self.calculate_dist(sim, block_goal)
 
         # Force the robot behind block and goal, align_cost is actually cos(theta)+1
-        # align_cost = align_weight[robot] * (cos_theta + 1) * 5
         align_cost = torch.zeros(self.num_samples, device=self.device)
         align_cost[self.cos_theta>0] = self.cos_theta[self.cos_theta>0]
-        # print('push align', align_cost[:10])
-        # ori_cost = skill_utils.get_general_ori_cube2goal(block_quat, goal_quaternion)
 
-        return 3 * self.dist_cost + 1 * align_cost #+ 10 * ori_cost# [num_envs] 31
+        return 3 * self.dist_cost + 1 * align_cost
 
     def get_pull_cost(self, sim: wrapper, block_goal: torch.tensor):
         self.calculate_dist(sim, block_goal)
@@ -83,16 +80,13 @@ def get_pull_cost(self, sim: wrapper, block_goal: torch.tensor):
         # Force the robot to be in the middle between block and goal, align_cost is actually 1-cos(theta)
         align_cost = torch.zeros(self.num_samples, device=self.device)
         align_cost[self.cos_theta<0] = -self.cos_theta[self.cos_theta<0]  # (1 - cos_theta)
-        # print('pull align', align_cost[-10:])
 
         # Add the cost when the robot is close to the block and moves towards the block
         vel_cost = torch.zeros(self.num_samples, device=self.device)
         robot_block_close = robot_to_block_dist <= 0.5
         vel_cost[flag_towards_block*robot_block_close] = 0.6
 
-        # ori_cost = skill_utils.get_general_ori_cube2goal(block_quat, goal_quaternion)
-
-        return 3 * self.dist_cost + 3 * vel_cost + 7 * align_cost #+ 10 * ori_cost # [num_envs] 315 
+        return 3 * self.dist_cost + 3 * vel_cost + 7 * align_cost
     
     def get_panda_reach_cost(self, sim, pre_pick_goal):
         ee_l_state = sim.get_actor_link_by_name("panda", "panda_leftfinger")
@@ -110,7 +104,6 @@ def get_panda_reach_cost(self, sim, pre_pick_goal):
             pre_pick_goal_1[2] += self.pre_height_diff
             pre_pick_goal_2[0] -= self.pre_height_diff * self.tilt_cos_theta
             pre_pick_goal_2[2] += self.pre_height_diff * (1 - self.tilt_cos_theta ** 2) ** 0.5
-            # print("1", pre_pick_goal_1, "2", pre_pick_goal_2)
             pre_pick_goal[:self.half_samples, :] = pre_pick_goal_1
             pre_pick_goal[self.half_samples:, :] = pre_pick_goal_2
             reach_cost = torch.linalg.norm(ee_state[:,:3] - pre_pick_goal, axis = 1) 
diff --git a/src/m3p2i_aip/planners/motion_planner/m3p2i.py b/src/m3p2i_aip/planners/motion_planner/m3p2i.py
@@ -54,18 +54,14 @@ def _multi_modal_exp_util(self, costs):
         total_costs_1 = traj_costs[:self.half_K] - torch.min(traj_costs[:self.half_K])
         total_costs_2 = traj_costs[self.half_K:] - torch.min(traj_costs[self.half_K:])
         total_costs = traj_costs - torch.min(traj_costs)
-        # print('1', total_costs_1)
-        # print('2', total_costs_2)
+
         eta_1, exp_1 = self.update_infinite_beta(total_costs_1, self.beta_1, 10, 3)
         eta_2, exp_2 = self.update_infinite_beta(total_costs_2, self.beta_2, 10, 3)
         eta, exp_ = self.update_infinite_beta(total_costs, self.beta, 10, 3)
-        # exp_ = torch.exp((-1.0/self.beta) * total_costs)
-        # eta = torch.sum(exp_)
 
         self.weights_1 = 1 / eta_1 * exp_1 
         self.weights_2 = 1 / eta_2 * exp_2
         self.weights = 1 / eta * exp_ 
-        # print('weights', self.weights.size())
     
     def _update_multi_modal_distribution(self, costs, actions):
         """
diff --git a/src/m3p2i_aip/planners/motion_planner/mppi.py b/src/m3p2i_aip/planners/motion_planner/mppi.py
@@ -197,7 +197,7 @@ def __init__(self, cfg: MPPIConfig, dynamics: Callable, running_cost: Callable):
         self.eta_min = 0.01     # 1%
         self.lambda_mult = 0.1  # Update rate
 
-        # covariance update
+        # Covariance update
         self.update_cov = cfg.update_cov   # !! weird if set to True
         self.step_size_cov = 0.7
         self.kappa = 0.005
@@ -233,7 +233,7 @@ def command(self, state):
             action = self.U[:self.u_per_command]
 
         elif self.mppi_mode == 'halton-spline':
-            # shift command 1 time step [T, nu]
+            # Shift command 1 time step [T, nu]
             self.mean_action = self._shift_action(self.mean_action)
             if self.multi_modal:
                 self.mean_action_1 = self._shift_action(self.mean_action_1)
@@ -283,7 +283,7 @@ def _compute_rollout_costs(self, perturbed_actions):
         cost_horizon = torch.zeros([K, T], **self.tensor_args)
         cost_samples = cost_total
 
-        # allow propagation of a sample of states (ex. to carry a distribution), or to start with a single state
+        # Allow propagation of a sample of states (ex. to carry a distribution), or to start with a single state
         if self.state.shape == (K, self.nx):
             state = self.state
         else:
@@ -314,13 +314,11 @@ def _compute_rollout_costs(self, perturbed_actions):
             ee_state = 'None' #(self.ee_l_state[:, :3] + self.ee_r_state[:, :3])/2 if self.ee_l_state != 'None' else 'None'
             ee_states.append(ee_state) if ee_state != 'None' else []
             
-        # Actions is K x T x nu
-        # States is K x T x nx
-        actions = torch.stack(actions, dim=-2)
-        states = torch.stack(states, dim=-2)
+        actions = torch.stack(actions, dim=-2) # [K, T, nu]
+        states = torch.stack(states, dim=-2) # [K, T, nx]
         ee_states = torch.stack(ee_states, dim=-2) if ee_states != [] else 'None'
 
-        # action perturbation cost
+        # Action perturbation cost
         if self.terminal_state_cost:
             c = self.terminal_state_cost(states, actions)
             cost_samples += c
@@ -405,7 +403,6 @@ def _compute_total_cost_batch_halton(self):
 
         # Scales action within bounds. act_seq is the same as perturbed actions
         act_seq = scale_ctrl(act_seq, self.u_min, self.u_max, squash_fn=self.squash_fn)
-        # print(act_seq.size())
 
         if self.multi_modal:
             act_seq[0, :, :] = self.best_traj_1
@@ -473,8 +470,7 @@ def get_samples(self, sample_shape, **kwargs):
                 device=self.device,
                 float_dtype=torch.float32)
             
-            # Sample splines from knot points:
-            # iteratre over action dimension:
+            # Sample splines from knot points, iterate over action dimension:
             knot_samples = self.knot_points.view(sample_shape, self.nu, self.n_knots) # n knots is T/knot_scale (30/4 = 7)
             self.samples = torch.zeros((sample_shape, self.T, self.nu), **self.tensor_args)
             for i in range(sample_shape):
@@ -502,23 +498,21 @@ def _update_distribution(self, costs, actions):
         weighted_seq = self.weights.view(-1, 1, 1) * actions # [K, T, nu]
         new_mean = torch.sum(weighted_seq, dim=0)
 
-        # Gradient update for the mean
+        # Update for the mean
         self.mean_action = (1.0 - self.step_size_mean) * self.mean_action +\
             self.step_size_mean * new_mean 
-        # print(self.mean_action.size()) # [T, nu]
        
         delta = actions - self.mean_action.unsqueeze(0)
 
-        #Update Covariance
+        # Update Covariance
         if self.update_cov:
-            #Diagonal covariance of size AxA
+            # Diagonal covariance of size AxA
             weighted_delta = self.weights * (delta ** 2).T
             # cov_update = torch.diag(torch.mean(torch.sum(weighted_delta.T, dim=0), dim=0))
             cov_update = torch.mean(torch.sum(weighted_delta.T, dim=0), dim=0)
     
             self.cov_action = (1.0 - self.step_size_cov) * self.cov_action + self.step_size_cov * cov_update
             self.cov_action += self.kappa #* self.init_cov_action
-            # self.cov_action[self.cov_action < 0.0005] = 0.0005
             self.scale_tril = torch.sqrt(self.cov_action)
 
         return delta
diff --git a/src/m3p2i_aip/planners/task_planner/isaac_int_req_templates.py b/src/m3p2i_aip/planners/task_planner/isaac_int_req_templates.py
diff --git a/src/m3p2i_aip/planners/task_planner/isaac_state_action_templates.py b/src/m3p2i_aip/planners/task_planner/isaac_state_action_templates.py
@@ -170,12 +170,6 @@ def __init__(self):
         self.B[:, :, 2] = np.array([[1, 1, 1],  # place
                                     [0, 0, 0],
                                     [0, 0, 0]])
-        # self.B[:, :, 1] = np.array([[0, 0, 0],  # pick
-        #                             [1, 1, 1],
-        #                             [0, 0, 0]])
-        # self.B[:, :, 2] = np.array([[0, 0, 0],  # place
-        #                             [0, 0, 0],
-        #                             [1, 1, 1]])
         # # Preconditions of the actions above
         # ----------------------------------------------------------
         self.preconditions = [['cube_at_goal'], ['cube_at_table'], ['cube_at_hand']]
diff --git a/src/m3p2i_aip/planners/task_planner/task_planner.py b/src/m3p2i_aip/planners/task_planner/task_planner.py
@@ -2,7 +2,7 @@
 import numpy as np
 from m3p2i_aip.utils import skill_utils
 from m3p2i_aip.planners.task_planner import ai_agent, adaptive_action_selection
-from m3p2i_aip.planners.task_planner import isaac_int_req_templates, isaac_state_action_templates
+from m3p2i_aip.planners.task_planner import isaac_state_action_templates
 
 def set_task_planner(cfg):
     if cfg.env_type == "point_env":
diff --git a/src/m3p2i_aip/utils/isaacgym_utils/isaacgym_wrapper.py b/src/m3p2i_aip/utils/isaacgym_utils/isaacgym_wrapper.py
@@ -3,6 +3,7 @@
 from typing import List
 from dataclasses import dataclass, field
 import  m3p2i_aip.utils.isaacgym_utils.actor_utils as actor_utils
+
 @dataclass
 class IsaacGymConfig():
     dt: float = 0.05 # 0.01
@@ -33,7 +34,6 @@ def parse_isaacgym_config(cfg: IsaacGymConfig, device: str = "cuda:0") -> gymapi
     # sim_params.physx.friction_offset_threshold = 0.01
     # sim_params.physx.friction_correlation_distance = 0.001
 
-    # return the configured params
     return sim_params
 
 class IsaacGymWrapper:
@@ -175,7 +175,6 @@ def get_actor_link_by_name(self, actor_name: str, link_name: str):
             ),
             device=self.device,
         )
-        # print(rigid_body_idx)
         return self.get_rigid_body_by_rigid_body_index(rigid_body_idx)
 
     def get_actor_contact_forces_by_name(self, actor_name: str, link_name: str):
@@ -221,7 +220,7 @@ def update_dyn_obs(self, i, period=100):
         )
 
     def set_initial_joint_pose(self):
-        # set initial joint poses
+        # Set initial joint poses
         robots = [a for a in self.env_cfg if a.type == "robot"]
         for robot in robots:
             dof_state = []