Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
prabhatnagarajan committed Dec 28, 2023
2 parents 776bb8c + b29533b commit 83459b4
Showing 55 changed files with 24 additions and 96 deletions.
2 changes: 1 addition & 1 deletion .pfnci/run.sh
Original file line number Diff line number Diff line change
@@ -75,7 +75,7 @@ main() {
# pytest does not run with attrs==19.2.0 (https://github.com/pytest-dev/pytest/issues/3280) # NOQA
"${PYTHON}" -m pip install \
'pytest==4.1.1' 'attrs==19.1.0' 'pytest-xdist==1.26.1' \
'gym[atari,classic_control]==0.19.0' 'optuna' 'zipp==1.0.0' 'pybullet==2.8.1' 'jupyterlab==2.1.5' 'traitlets==5.1.1'
'gym[atari,classic_control]==0.19.0' 'optuna' 'zipp==1.0.0' 'pybullet==2.8.1' 'jupyterlab==2.1.5' 'traitlets==5.1.1' 'pyglet==1.5.27'

git config --global user.email "you@example.com"
git config --global user.name "Your Name"
2 changes: 0 additions & 2 deletions examples/atari/reproduction/a3c/train_a3c.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,6 @@


def main():

parser = argparse.ArgumentParser()
parser.add_argument("--processes", type=int, default=16)
parser.add_argument("--env", type=str, default="BreakoutNoFrameskip-v4")
@@ -176,7 +175,6 @@ def phi(x):
)
)
else:

# Linearly decay the learning rate to zero
def lr_setter(env, agent, value):
for pg in agent.optimizer.param_groups:
2 changes: 0 additions & 2 deletions examples/atari/train_acer_ale.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,6 @@


def main():

parser = argparse.ArgumentParser()
parser.add_argument("processes", type=int)
parser.add_argument("--env", type=str, default="BreakoutNoFrameskip-v4")
@@ -185,7 +184,6 @@ def make_env(process_idx, test):
)
)
else:

# Linearly decay the learning rate to zero
def lr_setter(env, agent, value):
for pg in agent.optimizer.param_groups:
1 change: 0 additions & 1 deletion examples/atlas/train_soft_actor_critic_atlas.py
Original file line number Diff line number Diff line change
@@ -45,7 +45,6 @@ def make_env(args, seed, test):


def main():

parser = argparse.ArgumentParser()
parser.add_argument(
"--outdir",
1 change: 0 additions & 1 deletion examples/gym/train_dqn_gym.py
Original file line number Diff line number Diff line change
@@ -210,7 +210,6 @@ def make_env(idx=0, test=False):
)

elif not args.actor_learner:

print(
"WARNING: Since https://github.com/pfnet/pfrl/pull/112 we have started"
" setting `eval_during_episode=True` in this script, which affects the"
1 change: 0 additions & 1 deletion examples/mujoco/reproduction/ddpg/train_ddpg.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,6 @@


def main():

parser = argparse.ArgumentParser()
parser.add_argument(
"--outdir",
Original file line number Diff line number Diff line change
@@ -21,7 +21,6 @@


def main():

parser = argparse.ArgumentParser()
parser.add_argument(
"--outdir",
1 change: 0 additions & 1 deletion examples/mujoco/reproduction/td3/train_td3.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,6 @@


def main():

parser = argparse.ArgumentParser()
parser.add_argument(
"--outdir",
2 changes: 0 additions & 2 deletions examples/mujoco/reproduction/trpo/train_trpo.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,6 @@


def main():

parser = argparse.ArgumentParser()
parser.add_argument(
"--gpu", type=int, default=0, help="GPU device ID. Set to -1 to use CPUs only."
@@ -215,7 +214,6 @@ def ortho_init(layer, gain):
with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
json.dump(eval_stats, f)
else:

pfrl.experiments.train_agent_with_evaluation(
agent=agent,
env=env,
1 change: 0 additions & 1 deletion pfrl/agents/a2c.py
Original file line number Diff line number Diff line change
@@ -71,7 +71,6 @@ def __init__(
average_value_decay=0.999,
batch_states=batch_states,
):

self.model = model
if gpu is not None and gpu >= 0:
assert torch.cuda.is_available()
2 changes: 0 additions & 2 deletions pfrl/agents/a3c.py
Original file line number Diff line number Diff line change
@@ -64,7 +64,6 @@ def __init__(
average_value_decay=0.999,
batch_states=batch_states,
):

# Globally shared model
self.shared_model = model

@@ -241,7 +240,6 @@ def observe(self, obs, reward, done, reset):
self._observe_eval(obs, reward, done, reset)

def _act_train(self, obs):

self.past_obs[self.t] = obs

with torch.no_grad():
5 changes: 0 additions & 5 deletions pfrl/agents/acer.py
Original file line number Diff line number Diff line change
@@ -332,7 +332,6 @@ def __init__(
average_kl_decay=0.999,
logger=None,
):

# Globally shared model
self.shared_model = model

@@ -472,7 +471,6 @@ def compute_loss(
action_distribs_mu,
avg_action_distribs,
):

assert np.isscalar(R)
pi_loss = 0
Q_loss = 0
@@ -566,7 +564,6 @@ def update(
action_distribs_mu,
avg_action_distribs,
):

assert np.isscalar(R)
self.assert_shared_memory()

@@ -595,7 +592,6 @@ def update(
self.sync_parameters()

def update_from_replay(self):

if self.replay_buffer is None:
return

@@ -715,7 +711,6 @@ def observe(self, obs, reward, done, reset):
self._observe_eval(obs, reward, done, reset)

def _act_train(self, obs):

statevar = batch_states([obs], self.device, self.phi)

if self.recurrent:
1 change: 0 additions & 1 deletion pfrl/agents/al.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,6 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def _compute_y_and_t(self, exp_batch):

batch_state = exp_batch["state"]
batch_size = len(exp_batch["reward"])

3 changes: 0 additions & 3 deletions pfrl/agents/ddpg.py
Original file line number Diff line number Diff line change
@@ -81,7 +81,6 @@ def __init__(
batch_states=batch_states,
burnin_action_func=None,
):

self.model = nn.ModuleList([policy, q_func])
if gpu is not None and gpu >= 0:
assert torch.cuda.is_available()
@@ -223,7 +222,6 @@ def update_from_episodes(self, episodes, errors_out=None):
batches.append(batch)

with self.model.state_reset(), self.target_model.state_reset():

# Since the target model is evaluated one-step ahead,
# its internal states need to be updated
self.target_q_function.update_state(
@@ -238,7 +236,6 @@ def update_from_episodes(self, episodes, errors_out=None):
self.critic_optimizer.update(lambda: critic_loss / max_epi_len)

with self.model.state_reset():

# Update actor through time
actor_loss = 0
for batch in batches:
1 change: 0 additions & 1 deletion pfrl/agents/double_dqn.py
Original file line number Diff line number Diff line change
@@ -10,7 +10,6 @@ class DoubleDQN(dqn.DQN):
"""

def _compute_target_values(self, exp_batch):

batch_next_state = exp_batch["next_state"]

with evaluating(self.model):
1 change: 0 additions & 1 deletion pfrl/agents/double_pal.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,6 @@

class DoublePAL(pal.PAL):
def _compute_y_and_t(self, exp_batch):

batch_state = exp_batch["state"]
batch_size = len(exp_batch["reward"])

2 changes: 0 additions & 2 deletions pfrl/agents/dpp.py
Original file line number Diff line number Diff line change
@@ -17,7 +17,6 @@ def _l_operator(self, qout):
raise NotImplementedError()

def _compute_target_values(self, exp_batch):

batch_next_state = exp_batch["next_state"]

if self.recurrent:
@@ -38,7 +37,6 @@ def _compute_target_values(self, exp_batch):
)

def _compute_y_and_t(self, exp_batch):

batch_state = exp_batch["state"]
batch_size = len(exp_batch["reward"])

23 changes: 21 additions & 2 deletions pfrl/agents/dqn.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,9 @@
import ctypes
import multiprocessing as mp
import multiprocessing.synchronize
import os
import time
import typing
from logging import Logger, getLogger
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple

@@ -36,7 +38,7 @@

def _mean_or_nan(xs: Sequence[float]) -> float:
"""Return its mean a non-empty sequence, numpy.nan for a empty one."""
return np.mean(xs) if xs else np.nan
return typing.cast(float, np.mean(xs)) if xs else np.nan


def compute_value_loss(
@@ -511,7 +513,6 @@ def _batch_observe_train(
batch_done: Sequence[bool],
batch_reset: Sequence[bool],
) -> None:

for i in range(len(batch_obs)):
self.t += 1
self._cumulative_steps += 1
@@ -790,6 +791,24 @@ def stop_episode(self) -> None:
if self.recurrent:
self.test_recurrent_states = None

def save_snapshot(self, dirname: str) -> None:
self.save(dirname)
torch.save(self.t, os.path.join(dirname, "t.pt"))
torch.save(self.optim_t, os.path.join(dirname, "optim_t.pt"))
torch.save(
self._cumulative_steps, os.path.join(dirname, "_cumulative_steps.pt")
)
self.replay_buffer.save(os.path.join(dirname, "replay_buffer.pkl"))

def load_snapshot(self, dirname: str) -> None:
self.load(dirname)
self.t = torch.load(os.path.join(dirname, "t.pt"))
self.optim_t = torch.load(os.path.join(dirname, "optim_t.pt"))
self._cumulative_steps = torch.load(
os.path.join(dirname, "_cumulative_steps.pt")
)
self.replay_buffer.load(os.path.join(dirname, "replay_buffer.pkl"))

def get_statistics(self):
return [
("average_q", _mean_or_nan(self.q_record)),
1 change: 0 additions & 1 deletion pfrl/agents/pal.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,6 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def _compute_y_and_t(self, exp_batch):

batch_state = exp_batch["state"]
batch_size = len(exp_batch["reward"])

3 changes: 0 additions & 3 deletions pfrl/agents/ppo.py
Original file line number Diff line number Diff line change
@@ -115,7 +115,6 @@ def _add_log_prob_and_value_to_episodes(
obs_normalizer,
device,
):

dataset = list(itertools.chain.from_iterable(episodes))

# Compute v_pred and next_v_pred
@@ -533,7 +532,6 @@ def _update(self, dataset):
self.n_updates += 1

def _update_once_recurrent(self, episodes, mean_advs, std_advs):

assert std_advs is None or std_advs > 0

device = self.device
@@ -636,7 +634,6 @@ def _update_recurrent(self, dataset):
def _lossfun(
self, entropy, vs_pred, log_probs, vs_pred_old, log_probs_old, advs, vs_teacher
):

prob_ratio = torch.exp(log_probs - log_probs_old)

loss_policy = -torch.mean(
2 changes: 0 additions & 2 deletions pfrl/agents/reinforce.py
Original file line number Diff line number Diff line change
@@ -57,7 +57,6 @@ def __init__(
max_grad_norm=None,
logger=None,
):

self.model = model
if gpu is not None and gpu >= 0:
assert torch.cuda.is_available()
@@ -103,7 +102,6 @@ def observe(self, obs, reward, done, reset):
self._observe_eval(obs, reward, done, reset)

def _act_train(self, obs):

batch_obs = self.batch_states([obs], self.device, self.phi)
if self.recurrent:
action_distrib, self.train_recurrent_states = one_step_forward(
1 change: 0 additions & 1 deletion pfrl/agents/soft_actor_critic.py
Original file line number Diff line number Diff line change
@@ -119,7 +119,6 @@ def __init__(
temperature_optimizer_lr=None,
act_deterministically=True,
):

self.policy = policy
self.q_func1 = q_func1
self.q_func2 = q_func2
1 change: 0 additions & 1 deletion pfrl/agents/td3.py
Original file line number Diff line number Diff line change
@@ -101,7 +101,6 @@ def __init__(
policy_update_delay=2,
target_policy_smoothing_func=default_target_policy_smoothing_func,
):

self.policy = policy
self.q_func1 = q_func1
self.q_func2 = q_func2
3 changes: 0 additions & 3 deletions pfrl/agents/trpo.py
Original file line number Diff line number Diff line change
@@ -193,7 +193,6 @@ def __init__(
policy_step_size_stats_window=100,
logger=getLogger(__name__),
):

self.policy = policy
self.vf = vf
self.vf_optimizer = vf_optimizer
@@ -335,7 +334,6 @@ def _update_recurrent(self, dataset):
self._update_vf_recurrent(dataset)

def _update_vf_recurrent(self, dataset):

for epoch in range(self.vf_epochs):
random.shuffle(dataset)
for (
@@ -346,7 +344,6 @@ def _update_vf_recurrent(self, dataset):
self._update_vf_once_recurrent(minibatch)

def _update_vf_once_recurrent(self, episodes):

# Sort episodes desc by length for pack_sequence
episodes = sorted(episodes, key=len, reverse=True)

2 changes: 0 additions & 2 deletions pfrl/experiments/train_agent.py
Original file line number Diff line number Diff line change
@@ -35,7 +35,6 @@ def train_agent(
eval_during_episode=False,
logger=None,
):

logger = logger or logging.getLogger(__name__)

episode_r = 0
@@ -52,7 +51,6 @@ def train_agent(
episode_len = 0
try:
while t < steps:

# a_t
action = agent.act(obs)
# o_{t+1}, r_{t+1}
Loading

0 comments on commit 83459b4

Please sign in to comment.