From b7e6478c1cfcbfe4a0ff5ea72ab29679a49d240d Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Fri, 12 Apr 2024 17:57:32 -0600 Subject: [PATCH 1/5] detaches losses for records --- pfrl/agents/soft_actor_critic.py | 4 ++-- pfrl/agents/td3.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pfrl/agents/soft_actor_critic.py b/pfrl/agents/soft_actor_critic.py index 75e8ce98a..096e76d51 100644 --- a/pfrl/agents/soft_actor_critic.py +++ b/pfrl/agents/soft_actor_critic.py @@ -246,8 +246,8 @@ def update_q_func(self, batch): # Update stats self.q1_record.extend(predict_q1.detach().cpu().numpy()) self.q2_record.extend(predict_q2.detach().cpu().numpy()) - self.q_func1_loss_record.append(float(loss1)) - self.q_func2_loss_record.append(float(loss2)) + self.q_func1_loss_record.append(float(loss1.detach().cpu().numpy())) + self.q_func2_loss_record.append(float(loss2.detach().cpu().numpy())) self.q_func1_optimizer.zero_grad() loss1.backward() diff --git a/pfrl/agents/td3.py b/pfrl/agents/td3.py index dc913f56d..52ebd5f63 100644 --- a/pfrl/agents/td3.py +++ b/pfrl/agents/td3.py @@ -213,8 +213,8 @@ def update_q_func(self, batch): # Update stats self.q1_record.extend(predict_q1.detach().cpu().numpy()) self.q2_record.extend(predict_q2.detach().cpu().numpy()) - self.q_func1_loss_record.append(float(loss1)) - self.q_func2_loss_record.append(float(loss2)) + self.q_func1_loss_record.append(float(loss1.detach().cpu().numpy())) + self.q_func2_loss_record.append(float(loss1.detach().cpu().numpy())) self.q_func1_optimizer.zero_grad() loss1.backward() @@ -241,7 +241,7 @@ def update_policy(self, batch): # Since we want to maximize Q, loss is negation of Q loss = -torch.mean(q) - self.policy_loss_record.append(float(loss)) + self.policy_loss_record.append(float(loss.detach().cpu().numpy())) self.policy_optimizer.zero_grad() loss.backward() if self.max_grad_norm is not None: From b5b35d5f07843a373a033ea7e2fbf6298b60286e Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Fri, 12 Apr 2024 18:26:21 -0600 Subject: [PATCH 2/5] reverts numpy version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 33a2babf4..6a270e3e1 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ install_requires = [ 'torch>=1.3.0', 'gym>=0.9.7', - 'numpy>=1.11.0', + 'numpy>=1.10.4', 'pillow', 'filelock', ] From 999fe744c45451b958d1a98d19651a4c73a2694f Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Sun, 7 Jul 2024 12:25:58 -0400 Subject: [PATCH 3/5] Uses tensor.item() --- pfrl/agents/ddpg.py | 6 +++--- pfrl/agents/soft_actor_critic.py | 12 ++++++------ pfrl/agents/td3.py | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pfrl/agents/ddpg.py b/pfrl/agents/ddpg.py index 08c0748da..df8d48b89 100644 --- a/pfrl/agents/ddpg.py +++ b/pfrl/agents/ddpg.py @@ -168,7 +168,7 @@ def compute_critic_loss(self, batch): loss = F.mse_loss(target_q, predict_q) # Update stats - self.critic_loss_record.append(float(loss.detach().cpu().numpy())) + self.critic_loss_record.append(loss.item()) return loss @@ -181,8 +181,8 @@ def compute_actor_loss(self, batch): loss = -q.mean() # Update stats - self.q_record.extend(q.detach().cpu().numpy()) - self.actor_loss_record.append(float(loss.detach().cpu().numpy())) + self.q_record.extend(q.item()) + self.actor_loss_record.append(loss.item()) return loss diff --git a/pfrl/agents/soft_actor_critic.py b/pfrl/agents/soft_actor_critic.py index 096e76d51..47ccb14e4 100644 --- a/pfrl/agents/soft_actor_critic.py +++ b/pfrl/agents/soft_actor_critic.py @@ -244,10 +244,10 @@ def update_q_func(self, batch): loss2 = 0.5 * F.mse_loss(target_q, predict_q2) # Update stats - self.q1_record.extend(predict_q1.detach().cpu().numpy()) - self.q2_record.extend(predict_q2.detach().cpu().numpy()) - self.q_func1_loss_record.append(float(loss1.detach().cpu().numpy())) - self.q_func2_loss_record.append(float(loss2.detach().cpu().numpy())) + self.q1_record.extend(predict_q1.item()) + self.q2_record.extend(predict_q2.item()) + self.q_func1_loss_record.append(loss1.item()) + self.q_func2_loss_record.append(loss2.item()) self.q_func1_optimizer.zero_grad() loss1.backward() @@ -301,11 +301,11 @@ def update_policy_and_temperature(self, batch): with torch.no_grad(): try: self.entropy_record.extend( - action_distrib.entropy().detach().cpu().numpy() + action_distrib.entropy().item() ) except NotImplementedError: # Record - log p(x) instead - self.entropy_record.extend(-log_prob.detach().cpu().numpy()) + self.entropy_record.extend(-log_prob.item()) def update(self, experiences, errors_out=None): """Update the model from experiences""" diff --git a/pfrl/agents/td3.py b/pfrl/agents/td3.py index 52ebd5f63..288bb1932 100644 --- a/pfrl/agents/td3.py +++ b/pfrl/agents/td3.py @@ -211,10 +211,10 @@ def update_q_func(self, batch): loss2 = F.mse_loss(target_q, predict_q2) # Update stats - self.q1_record.extend(predict_q1.detach().cpu().numpy()) - self.q2_record.extend(predict_q2.detach().cpu().numpy()) - self.q_func1_loss_record.append(float(loss1.detach().cpu().numpy())) - self.q_func2_loss_record.append(float(loss1.detach().cpu().numpy())) + self.q1_record.extend(predict_q1.item()) + self.q2_record.extend(predict_q2.item()) + self.q_func1_loss_record.append(loss1.item()) + self.q_func2_loss_record.append(loss2.item()) self.q_func1_optimizer.zero_grad() loss1.backward() @@ -241,7 +241,7 @@ def update_policy(self, batch): # Since we want to maximize Q, loss is negation of Q loss = -torch.mean(q) - self.policy_loss_record.append(float(loss.detach().cpu().numpy())) + self.policy_loss_record.append(loss.item()) self.policy_optimizer.zero_grad() loss.backward() if self.max_grad_norm is not None: From 50bd939225e890d55e1bcbe6c32aca4354b183cf Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Sun, 7 Jul 2024 15:07:32 -0400 Subject: [PATCH 4/5] reverts q record computation --- pfrl/agents/ddpg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pfrl/agents/ddpg.py b/pfrl/agents/ddpg.py index df8d48b89..9319f5475 100644 --- a/pfrl/agents/ddpg.py +++ b/pfrl/agents/ddpg.py @@ -181,7 +181,7 @@ def compute_actor_loss(self, batch): loss = -q.mean() # Update stats - self.q_record.extend(q.item()) + self.q_record.extend(q.detach().cpu().numpy()) self.actor_loss_record.append(loss.item()) return loss From 41c0e924046253529d25090f50a16a4a2871b59c Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Sun, 7 Jul 2024 15:13:14 -0400 Subject: [PATCH 5/5] for extends, undoes things --- pfrl/agents/soft_actor_critic.py | 8 ++++---- pfrl/agents/td3.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pfrl/agents/soft_actor_critic.py b/pfrl/agents/soft_actor_critic.py index 47ccb14e4..57f76ae94 100644 --- a/pfrl/agents/soft_actor_critic.py +++ b/pfrl/agents/soft_actor_critic.py @@ -244,8 +244,8 @@ def update_q_func(self, batch): loss2 = 0.5 * F.mse_loss(target_q, predict_q2) # Update stats - self.q1_record.extend(predict_q1.item()) - self.q2_record.extend(predict_q2.item()) + self.q1_record.extend(predict_q1.detach().cpu().numpy()) + self.q2_record.extend(predict_q2.detach().cpu().numpy()) self.q_func1_loss_record.append(loss1.item()) self.q_func2_loss_record.append(loss2.item()) @@ -301,11 +301,11 @@ def update_policy_and_temperature(self, batch): with torch.no_grad(): try: self.entropy_record.extend( - action_distrib.entropy().item() + action_distrib.entropy().detach().cpu().numpy() ) except NotImplementedError: # Record - log p(x) instead - self.entropy_record.extend(-log_prob.item()) + self.entropy_record.extend(-log_prob.detach().cpu().numpy()) def update(self, experiences, errors_out=None): """Update the model from experiences""" diff --git a/pfrl/agents/td3.py b/pfrl/agents/td3.py index 288bb1932..e70e7e98b 100644 --- a/pfrl/agents/td3.py +++ b/pfrl/agents/td3.py @@ -211,8 +211,8 @@ def update_q_func(self, batch): loss2 = F.mse_loss(target_q, predict_q2) # Update stats - self.q1_record.extend(predict_q1.item()) - self.q2_record.extend(predict_q2.item()) + self.q1_record.extend(predict_q1.detach().cpu().numpy()) + self.q2_record.extend(predict_q2.detach().cpu().numpy()) self.q_func1_loss_record.append(loss1.item()) self.q_func2_loss_record.append(loss2.item())