Merge pull request #352 from google-deepmind/deepmind

thowell · web-flow · commit 9bd203749805 · 2024-11-15T05:51:31.000-08:00
Merge from `deepmind` to `main`.
diff --git a/mjpc/agent.cc b/mjpc/agent.cc
@@ -153,6 +153,19 @@ void Agent::Initialize(const mjModel* model) {
   planner_threads_ =
       std::max(1, NumAvailableHardwareThreads() - 3 - 2 * estimator_threads_);
 
+  // differentiable planning model
+  // by default gradient-based planners use a differentiable model
+  int gradient_planner = false;
+  if (planner_ == kGradientPlanner || planner_ == kILQGPlanner ||
+      planner_ == kILQSPlanner) {
+    gradient_planner = true;
+  }
+  differentiable_ =
+      GetNumberOrDefault(gradient_planner, model, "agent_differentiable");
+  jnt_solimp_.resize(model->njnt);
+  geom_solimp_.resize(model->ngeom);
+  pair_solimp_.resize(model->npair);
+
   // delete the previous model after all the planners have been updated to use
   // the new one.
   if (old_model) {
@@ -279,6 +292,22 @@ void Agent::PlanIteration(ThreadPool* pool) {
   steps_ =
       mju_max(mju_min(horizon_ / timestep_ + 1, kMaxTrajectoryHorizon), 1);
 
+  // make model differentiable
+  int differentiable = differentiable_;
+  if (differentiable) {
+    // cache solimp defaults
+    for (int i = 0; i < model_->njnt; i++) {
+      jnt_solimp_[i] = model_->jnt_solimp[mjNIMP * i];
+    }
+    for (int i = 0; i < model_->ngeom; i++) {
+      geom_solimp_[i] = model_->geom_solimp[mjNIMP * i];
+    }
+    for (int i = 0; i < model_->npair; i++) {
+      pair_solimp_[i] = model_->pair_solimp[mjNIMP * i];
+    }
+    MakeDifferentiable(model_);
+  }
+
   // plan
   if (!allocate_enabled) {
     // set state
@@ -312,6 +341,19 @@ void Agent::PlanIteration(ThreadPool* pool) {
     // release the planning residual function
     residual_fn_.reset();
   }
+
+  // restore solimp defaults
+  if (differentiable) {
+    for (int i = 0; i < model_->njnt; i++) {
+      model_->jnt_solimp[mjNIMP * i] = jnt_solimp_[i];
+    }
+    for (int i = 0; i < model_->ngeom; i++) {
+      model_->geom_solimp[mjNIMP * i] = geom_solimp_[i];
+    }
+    for (int i = 0; i < model_->npair; i++) {
+      model_->pair_solimp[mjNIMP * i] = pair_solimp_[i];
+    }
+  }
 }
 
 // call planner to update nominal policy
@@ -644,21 +686,23 @@ void Agent::GUI(mjUI& ui) {
   }
 
   // ----- agent ----- //
-  mjuiDef defAgent[] = {{mjITEM_SECTION, "Agent", 1, nullptr, "AP"},
-                        {mjITEM_BUTTON, "Reset", 2, nullptr, " #459"},
-                        {mjITEM_SELECT, "Planner", 2, &planner_, ""},
-                        {mjITEM_SELECT, "Estimator", 2, &estimator_, ""},
-                        {mjITEM_CHECKINT, "Plan", 2, &plan_enabled, ""},
-                        {mjITEM_CHECKINT, "Action", 2, &action_enabled, ""},
-                        {mjITEM_CHECKINT, "Plots", 2, &plot_enabled, ""},
-                        {mjITEM_CHECKINT, "Traces", 2, &visualize_enabled, ""},
-                        {mjITEM_SEPARATOR, "Agent Settings", 1},
-                        {mjITEM_SLIDERNUM, "Horizon", 2, &horizon_, "0 1"},
-                        {mjITEM_SLIDERNUM, "Timestep", 2, &timestep_, "0 1"},
-                        {mjITEM_SELECT, "Integrator", 2, &integrator_,
-                         "Euler\nRK4\nImplicit\nImplicitFast"},
-                        {mjITEM_SEPARATOR, "Planner Settings", 1},
-                        {mjITEM_END}};
+  mjuiDef defAgent[] = {
+      {mjITEM_SECTION, "Agent", 1, nullptr, "AP"},
+      {mjITEM_BUTTON, "Reset", 2, nullptr, " #459"},
+      {mjITEM_SELECT, "Planner", 2, &planner_, ""},
+      {mjITEM_SELECT, "Estimator", 2, &estimator_, ""},
+      {mjITEM_CHECKINT, "Plan", 2, &plan_enabled, ""},
+      {mjITEM_CHECKINT, "Action", 2, &action_enabled, ""},
+      {mjITEM_CHECKINT, "Plots", 2, &plot_enabled, ""},
+      {mjITEM_CHECKINT, "Traces", 2, &visualize_enabled, ""},
+      {mjITEM_SEPARATOR, "Agent Settings", 1},
+      {mjITEM_SLIDERNUM, "Horizon", 2, &horizon_, "0 1"},
+      {mjITEM_SLIDERNUM, "Timestep", 2, &timestep_, "0 1"},
+      {mjITEM_SELECT, "Integrator", 2, &integrator_,
+       "Euler\nRK4\nImplicit\nImplicitFast"},
+      {mjITEM_CHECKINT, "Differentiable", 2, &differentiable_, ""},
+      {mjITEM_SEPARATOR, "Planner Settings", 1},
+      {mjITEM_END}};
 
   // planner names
   mju::strcpy_arr(defAgent[2].other, planner_names_);
@@ -730,6 +774,14 @@ void Agent::AgentEvent(mjuiItem* it, mjData* data,
         this->PlotInitialize();
         this->PlotReset();
 
+        // by default gradient-based planners use a differentiable model
+        if (planner_ == kGradientPlanner || planner_ == kILQGPlanner ||
+            planner_ == kILQSPlanner) {
+          differentiable_ = true;
+        } else {
+          differentiable_ = false;
+        }
+
         // reset agent
         uiloadrequest.fetch_sub(1);
       }
diff --git a/mjpc/agent.h b/mjpc/agent.h
@@ -247,6 +247,12 @@ class Agent {
 
   // max threads for estimation
   int estimator_threads_;
+
+  // differentiable planning model
+  bool differentiable_;
+  std::vector<double> jnt_solimp_;
+  std::vector<double> geom_solimp_;
+  std::vector<double> pair_solimp_;
 };
 
 }  // namespace mjpc
diff --git a/mjpc/planners/cross_entropy/planner.cc b/mjpc/planners/cross_entropy/planner.cc
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <chrono>
 #include <cmath>
+#include <mutex>
 #include <shared_mutex>
 
 #include <absl/random/random.h>
@@ -54,8 +55,11 @@ void CrossEntropyPlanner::Initialize(mjModel* model, const Task& task) {
   // sampling noise
   std_initial_ =
       GetNumberOrDefault(0.1, model,
-                         "sampling_exploration");        // initial variance
-  std_min_ = GetNumberOrDefault(0.1, model, "std_min");  // minimum variance
+                         "sampling_exploration");         // initial variance
+  std_min_ = GetNumberOrDefault(0.01, model, "std_min");  // minimum variance
+  // fraction of the trajectories that will use full exploration noise
+  explore_fraction_ =
+      GetNumberOrDefault(0.0, model, "explore_fraction");
 
   // set number of trajectories to rollout
   num_trajectory_ = GetNumberOrDefault(10, model, "sampling_trajectories");
@@ -227,12 +231,13 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
   for (int i = 0; i < n_elite; i++) {
     // ordered trajectory index
     int idx = trajectory_order[i];
+    const TimeSpline& elite_plan = candidate_policy[idx].plan;
 
     // add parameters
-    for (int i = 0; i < num_spline_points; i++) {
-      TimeSpline::Node n = candidate_policy[idx].plan.NodeAt(i);
+    for (int t = 0; t < num_spline_points; t++) {
+      TimeSpline::ConstNode n = elite_plan.NodeAt(t);
       for (int j = 0; j < model->nu; j++) {
-        parameters_scratch[i * model->nu + j] += n.values()[j];
+        parameters_scratch[t * model->nu + j] += n.values()[j];
       }
     }
 
@@ -247,12 +252,15 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
 
   // loop over elites to compute variance
   std::fill(variance.begin(), variance.end(), 0.0);  // reset variance to zero
-  for (int t = 0; t < num_spline_points; t++) {
-    TimeSpline::Node n = candidate_policy[trajectory_order[0]].plan.NodeAt(t);
-    for (int j = 0; j < model->nu; j++) {
-      // average
-      double p_avg = parameters_scratch[t * model->nu + j];
-      for (int i = 0; i < n_elite; i++) {
+  for (int i = 0; i < n_elite; i++) {
+    int idx = trajectory_order[i];
+    const TimeSpline& elite_plan = candidate_policy[idx].plan;
+    for (int t = 0; t < num_spline_points; t++) {
+      TimeSpline::ConstNode n = elite_plan.NodeAt(t);
+      for (int j = 0; j < model->nu; j++) {
+        // average
+        double p_avg = parameters_scratch[t * model->nu + j];
+
         // candidate parameter
         double pi = n.values()[j];
         double diff = pi - p_avg;
@@ -263,7 +271,7 @@ void CrossEntropyPlanner::OptimizePolicy(int horizon, ThreadPool& pool) {
 
   // update
   {
-    const std::shared_lock<std::shared_mutex> lock(mtx_);
+    const std::unique_lock<std::shared_mutex> lock(mtx_);
     policy.plan.Clear();
     policy.plan.SetInterpolation(interpolation_);
     for (int t = 0; t < num_spline_points; t++) {
@@ -384,14 +392,21 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
 
   // lock std_min
   double std_min = std_min_;
+  double std_initial = std_initial_;
 
   // random search
   int count_before = pool.GetCount();
   for (int i = 0; i < num_trajectory; i++) {
+    double std;
+    if (i < num_trajectory * explore_fraction_) {
+      std = std_initial;
+    } else {
+      std = std_min;
+    }
     pool.Schedule([&s = *this, &model = this->model, &task = this->task,
                    &state = this->state, &time = this->time,
                    &mocap = this->mocap, &userdata = this->userdata, horizon,
-                   std_min, i]() {
+                   std, i]() {
       // copy nominal policy and sample noise
       {
         const std::shared_lock<std::shared_mutex> lock(s.mtx_);
@@ -401,7 +416,7 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon,
             s.resampled_policy.plan.Interpolation());
 
         // sample noise
-        s.AddNoiseToPolicy(i, std_min);
+        s.AddNoiseToPolicy(i, std);
       }
 
       // ----- rollout sample policy ----- //
@@ -486,6 +501,7 @@ void CrossEntropyPlanner::GUI(mjUI& ui) {
       {mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"},
       {mjITEM_SLIDERNUM, "Init. Std", 2, &std_initial_, "0 1"},
       {mjITEM_SLIDERNUM, "Min. Std", 2, &std_min_, "0.01 0.5"},
+      {mjITEM_SLIDERNUM, "Explore", 2, &explore_fraction_, "0.0 1.0"},
       {mjITEM_SLIDERINT, "Elite", 2, &n_elite_, "2 128"},
       {mjITEM_END}};
 
diff --git a/mjpc/planners/cross_entropy/planner.h b/mjpc/planners/cross_entropy/planner.h
@@ -122,6 +122,8 @@ class CrossEntropyPlanner : public Planner {
   double std_initial_;  // standard deviation for sampling normal: N(0,
                         // std)
   double std_min_;      // the minimum allowable std
+  double explore_fraction_ = 0;  // fraction of trajectories that will use
+                                 // std_initial instead of the variance from CEM
   std::vector<double> noise;
   std::vector<double> variance;
 
diff --git a/mjpc/planners/ilqg/planner.cc b/mjpc/planners/ilqg/planner.cc
@@ -584,7 +584,19 @@ void iLQGPlanner::Iteration(int horizon, ThreadPool& pool) {
     std::cout << "  dV: " << expected << '\n';
     std::cout << "  dV[0]: " << backward_pass.dV[0] << '\n';
     std::cout << "  dV[1]: " << backward_pass.dV[1] << '\n';
-    std::cout << std::endl;
+
+    std::cout << "\niLQG Timing (ms)\n" << '\n';
+    std::cout << "  nominal: " << nominal_compute_time * 1.0e-3 << '\n';
+    std::cout << "  model derivative: "
+              << model_derivative_compute_time * 1.0e-3 << '\n';
+    std::cout << "  cost derivative: " << cost_derivative_compute_time * 1.0e-3
+              << '\n';
+    std::cout << "  backward pass: " << backward_pass_compute_time * 1.0e-3
+              << '\n';
+    std::cout << "  rollouts: " << rollouts_compute_time * 1.0e-3 << '\n';
+    std::cout << "  policy update: " << policy_update_compute_time * 1.0e-3
+              << '\n';
+    std::cout << "\n\n";
   }
 
   // stop timer
diff --git a/mjpc/planners/include.h b/mjpc/planners/include.h
@@ -22,6 +22,17 @@
 
 namespace mjpc {
 
+// planner types
+enum PlannerType : int {
+  kSamplingPlanner = 0,
+  kGradientPlanner,
+  kILQGPlanner,
+  kILQSPlanner,
+  kRobustPlanner,
+  kCrossEntropyPlanner,
+  kSampleGradientPlanner,
+};
+
 // Planner names, separated by '\n'.
 extern const char kPlannerNames[];
 
diff --git a/mjpc/tasks/quadruped/quadruped.cc b/mjpc/tasks/quadruped/quadruped.cc
@@ -182,16 +182,21 @@ void QuadrupedFlat::ResidualFn::Residual(const mjModel* model,
   if (current_mode_ == kModeBiped) {
     // loosen the "hands" in Biped mode
     bool handstand = ReinterpretAsInt(parameters_[biped_type_param_id_]);
+    double arm_posture = parameters_[arm_posture_param_id_];
     if (handstand) {
-      residual[counter + 4] *= 0.03;
-      residual[counter + 5] *= 0.03;
-      residual[counter + 10] *= 0.03;
-      residual[counter + 11] *= 0.03;
+      residual[counter + 6] *= arm_posture;
+      residual[counter + 7] *= arm_posture;
+      residual[counter + 8] *= arm_posture;
+      residual[counter + 9] *= arm_posture;
+      residual[counter + 10] *= arm_posture;
+      residual[counter + 11] *= arm_posture;
     } else {
-      residual[counter + 1] *= 0.03;
-      residual[counter + 2] *= 0.03;
-      residual[counter + 7] *= 0.03;
-      residual[counter + 8] *= 0.03;
+      residual[counter + 0] *= arm_posture;
+      residual[counter + 1] *= arm_posture;
+      residual[counter + 2] *= arm_posture;
+      residual[counter + 3] *= arm_posture;
+      residual[counter + 4] *= arm_posture;
+      residual[counter + 5] *= arm_posture;
     }
   }
   counter += model->nu;
@@ -521,6 +526,7 @@ void QuadrupedFlat::ResetLocked(const mjModel* model) {
   residual_.cadence_param_id_ = ParameterIndex(model, "Cadence");
   residual_.amplitude_param_id_ = ParameterIndex(model, "Amplitude");
   residual_.duty_param_id_ = ParameterIndex(model, "Duty ratio");
+  residual_.arm_posture_param_id_ = ParameterIndex(model, "Arm posture");
   residual_.balance_cost_id_ = CostTermByName(model, "Balance");
   residual_.upright_cost_id_ = CostTermByName(model, "Upright");
   residual_.height_cost_id_ = CostTermByName(model, "Height");
diff --git a/mjpc/tasks/quadruped/quadruped.h b/mjpc/tasks/quadruped/quadruped.h
@@ -203,6 +203,7 @@ class QuadrupedFlat : public Task {
     int cadence_param_id_     = -1;
     int amplitude_param_id_   = -1;
     int duty_param_id_        = -1;
+    int arm_posture_param_id_ = -1;
     int upright_cost_id_      = -1;
     int balance_cost_id_      = -1;
     int height_cost_id_       = -1;
diff --git a/mjpc/tasks/quadruped/task_flat.xml b/mjpc/tasks/quadruped/task_flat.xml
@@ -29,6 +29,7 @@
     <numeric name="residual_select_Biped type" data="0"/>
     <text name="residual_list_Biped type" data="Foot Stand|Hand Stand"/>
     <numeric name="residual_Heading" data="0 -3.14 3.14" />
+    <numeric name="residual_Arm posture" data=".03 0 1"/>
 
     <!-- estimator -->
     <numeric name="estimator" data="0" />
diff --git a/mjpc/tasks/shadow_reorient/task.xml b/mjpc/tasks/shadow_reorient/task.xml
@@ -4,13 +4,17 @@
   <size memory="1M"/>
 
   <custom>
-    <numeric name="agent_planner" data="0" />
+    <numeric name="agent_planner" data="5" />
     <numeric name="agent_horizon" data="0.25" />
     <numeric name="agent_timestep" data="0.01" />
     <numeric name="agent_policy_width" data="0.0035" />
     <numeric name="sampling_spline_points" data="5" />
-    <numeric name="sampling_exploration" data="0.1" />
+    <numeric name="sampling_exploration" data="0.2" />
     <numeric name="sampling_representation" data="0" />
+    <numeric name="sampling_trajectories" data="60" />
+    <numeric name="n_elite" data="8" />
+    <numeric name="explore_fraction" data="0.5" />
+
     <numeric name="robust_xfrc" data="0.004" />
   </custom>
 
diff --git a/mjpc/utilities.cc b/mjpc/utilities.cc
@@ -56,6 +56,24 @@ extern "C" {
 }
 
 namespace mjpc {
+// make model differentiable by setting solimp[0] to zero
+void MakeDifferentiable(mjModel* model) {
+  // joints
+  for (int i = 0; i < model->njnt; i++) {
+    model->jnt_solimp[mjNIMP * i] = 0.0;
+  }
+
+  // geoms
+  for (int i = 0; i < model->ngeom; i++) {
+    model->geom_solimp[mjNIMP * i] = 0.0;
+  }
+
+  // contact pairs
+  for (int i = 0; i < model->npair; i++) {
+    model->pair_solimp[mjNIMP * i] = 0.0;
+  }
+}
+
 // set mjData state
 void SetState(const mjModel* model, mjData* data, const double* state) {
   mju_copy(data->qpos, state, model->nq);
diff --git a/mjpc/utilities.h b/mjpc/utilities.h
@@ -33,6 +33,9 @@ namespace mjpc {
 // maximum number of traces that are visualized
 inline constexpr int kMaxTraces = 99;
 
+// make model differentiable by setting solimp[0] to zero
+void MakeDifferentiable(mjModel* model);
+
 // set mjData state
 void SetState(const mjModel* model, mjData* data, const double* state);