Skip to content

Commit

Permalink
Merge pull request #33 from prabhatnagarajan/pretrained_models
Browse files Browse the repository at this point in the history
Pretrained models
  • Loading branch information
muupan authored Dec 16, 2020
2 parents 322fa45 + 719dfce commit 4e45b8d
Show file tree
Hide file tree
Showing 24 changed files with 874 additions and 32 deletions.
7 changes: 7 additions & 0 deletions .pfnci/config.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ configs {
seconds: 1200
}
environment_variables { key: "GPU" value: "1" }
environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
command: "bash .pfnci/script.sh gpu"
}
}
Expand All @@ -27,6 +28,7 @@ configs {
}
environment_variables { key: "GPU" value: "1" }
environment_variables { key: "SLOW" value: "1" }
environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
command: "bash .pfnci/script.sh gpu"
}
}
Expand All @@ -44,6 +46,7 @@ configs {
}
environment_variables { key: "GPU" value: "1" }
environment_variables { key: "TEST_EXAMPLES" value: "1" }
environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
command: "bash .pfnci/script.sh gpu"
}
}
Expand All @@ -59,6 +62,7 @@ configs {
seconds: 2400
}
environment_variables { key: "SLOW" value: "1" }
environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
command: "bash .pfnci/script.sh cpu"
}
}
Expand All @@ -74,6 +78,7 @@ configs {
time_limit {
seconds: 1200
}
environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
command: "bash .pfnci/script.sh cpu"
}
}
Expand All @@ -88,6 +93,7 @@ configs {
time_limit {
seconds: 1200
}
environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
environment_variables { key: "TEST_EXAMPLES" value: "1" }
command: "bash .pfnci/script.sh cpu"
}
Expand All @@ -104,6 +110,7 @@ configs {
time_limit {
seconds: 1200
}
environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
command: "bash .pfnci/lint.sh"
}
}
1 change: 0 additions & 1 deletion .pfnci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ main() {
marker+=' and gpu'
bucket="${GPU}"
fi
marker+=' and not download_model'


UBUNTU_VERSION_ID=$(grep DISTRIB_RELEASE /etc/lsb-release | cut -d "=" -f2)
Expand Down
10 changes: 9 additions & 1 deletion .pfnci/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ main() {
wait

# Prepare docker args.
docker_args=(docker run --rm --volume="$(pwd):/src:ro")
docker_args=(docker run --rm --volume="$(pwd):/src:ro" --volume="/root/.pfrl:/root/.pfrl/")
if [ "${GPU:-0}" != '0' ]; then
docker_args+=(--ipc=host --privileged --env="GPU=${GPU}" --runtime=nvidia)
fi
Expand All @@ -50,6 +50,14 @@ main() {
docker_image=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-runtime
docker_args+=(--env="SLOW=${SLOW:-0}")

for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip
do
gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} .
mkdir -p ~/.pfrl/models/
unzip ${ZIP} -d ~/.pfrl/models/
rm ${ZIP}
done

run "${docker_args[@]}" "${docker_image}" bash /src/.pfnci/run.sh "${TARGET}"
}

Expand Down
8 changes: 7 additions & 1 deletion examples/atari/reproduction/a3c/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@ To run the training example:
python train_a3c.py [options]
```

We have already trained models from this script for all the domains listed in the [results](#Results). To load a pretrained model:
```
python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best
```

### Useful Options
- `--env`. Specifies the environment.
- `--render`. Add this option to render the states in a GUI window.
- `--seed`. This option specifies the random seed used.
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

To view the full list of options, either view the code or run the example with the `--help` option.

Expand Down
19 changes: 14 additions & 5 deletions examples/atari/reproduction/a3c/train_a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def main():
parser.add_argument("--eval-n-steps", type=int, default=125000)
parser.add_argument("--demo", action="store_true", default=False)
parser.add_argument("--load-pretrained", action="store_true", default=False)
parser.add_argument(
"--pretrained-type", type=str, default="best", choices=["best", "final"]
)
parser.add_argument("--load", type=str, default="")
parser.add_argument(
"--log-level",
Expand Down Expand Up @@ -144,11 +147,17 @@ def phi(x):
max_grad_norm=40.0,
)

if args.load_pretrained:
raise Exception("Pretrained models are currently unsupported.")

if args.load:
agent.load(args.load)
if args.load or args.load_pretrained:
# either load or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
agent.load(args.load)
else:
agent.load(
utils.download_model("A3C", args.env, model_type=args.pretrained_type)[
0
]
)

if args.demo:
env = make_env(0, True)
Expand Down
8 changes: 7 additions & 1 deletion examples/atari/reproduction/dqn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@ This example trains a DQN agent, from the following paper: [Human-level control

- atari_py>=0.1.1
- opencv-python
- filelock

## Running the Example

To run the training example:
```
python train_dqn.py [options]
```
We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model:

```
python train_dqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
```

### Useful Options
- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
Expand All @@ -20,7 +26,7 @@ python train_dqn.py [options]
- `--seed`. This option specifies the random seed used.
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

To view the full list of options, either view the code or run the example with the `--help` option.
Expand Down
2 changes: 0 additions & 2 deletions examples/atari/reproduction/dqn/train_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,6 @@ def phi(x):
)

if args.load or args.load_pretrained:
if args.load_pretrained:
raise Exception("Pretrained models are currently unsupported.")
# either load or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
Expand Down
8 changes: 7 additions & 1 deletion examples/atari/reproduction/iqn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,20 @@ To run the training example:
python train_iqn.py [options]
```

We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model:

```
python train_iqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
```

### Useful Options
- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
- `--env`. Specifies the environment.
- `--render`. Add this option to render the states in a GUI window.
- `--seed`. This option specifies the random seed used.
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

To view the full list of options, either view the code or run the example with the `--help` option.
Expand Down
2 changes: 0 additions & 2 deletions examples/atari/reproduction/iqn/train_iqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,6 @@ def phi(x):
)

if args.load or args.load_pretrained:
if args.load_pretrained:
raise Exception("Pretrained models are currently unsupported.")
# either load or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
Expand Down
8 changes: 7 additions & 1 deletion examples/atari/reproduction/rainbow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,20 @@ To run the training example:
python train_rainbow.py [options]
```

We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:

```
python train_rainbow.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
```

### Useful Options
- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_rainbow.py --gpu -1`.
- `--env`. Specifies the environment.
- `--render`. Add this option to render the states in a GUI window.
- `--seed`. This option specifies the random seed used.
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

To view the full list of options, either view the code or run the example with the `--help` option.
Expand Down
2 changes: 0 additions & 2 deletions examples/atari/reproduction/rainbow/train_rainbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,6 @@ def phi(x):
)

if args.load or args.load_pretrained:
if args.load_pretrained:
raise Exception("Pretrained models are currently unsupported.")
# either load_ or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
Expand Down
7 changes: 6 additions & 1 deletion examples/mujoco/reproduction/ddpg/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ To run the training example:
```
python train_ddpg.py [options]
```
We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:

```
python train_ddpg.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
```

### Useful Options

Expand All @@ -23,7 +28,7 @@ python train_ddpg.py [options]
- `--seed`. This option specifies the random seed used.
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).


Expand Down
7 changes: 5 additions & 2 deletions examples/mujoco/reproduction/ddpg/train_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,6 @@ def burnin_action_func():
)

if len(args.load) > 0 or args.load_pretrained:
if args.load_pretrained:
raise Exception("Pretrained models are currently unsupported.")
# either load or load_pretrained must be false
assert not len(args.load) > 0 or not args.load_pretrained
if len(args.load) > 0:
Expand Down Expand Up @@ -205,6 +203,11 @@ def burnin_action_func():
eval_stats["stdev"],
)
)
import json
import os

with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
json.dump(eval_stats, f)
else:
experiments.train_agent_with_evaluation(
agent=agent,
Expand Down
8 changes: 7 additions & 1 deletion examples/mujoco/reproduction/ppo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ To run the training example:
python train_ppo.py [options]
```

We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:

```
python train_ppo.py --demo --load-pretrained --env HalfCheetah-v2 --gpu -1
```

### Useful Options

- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_ppo.py --gpu -1`.
Expand All @@ -24,7 +30,7 @@ python train_ppo.py [options]
- `--seed`. This option specifies the random seed used.
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.

To view the full list of options, either view the code or run the example with the `--help` option.

Expand Down
7 changes: 5 additions & 2 deletions examples/mujoco/reproduction/ppo/train_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,6 @@ def ortho_init(layer, gain):
)

if args.load or args.load_pretrained:
if args.load_pretrained:
raise Exception("Pretrained models are currently unsupported.")
# either load or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
Expand All @@ -232,6 +230,11 @@ def ortho_init(layer, gain):
eval_stats["stdev"],
)
)
import json
import os

with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
json.dump(eval_stats, f)
else:
experiments.train_agent_batch_with_evaluation(
agent=agent,
Expand Down
8 changes: 7 additions & 1 deletion examples/mujoco/reproduction/soft_actor_critic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ To run the training example:
python train_soft_actor_critic.py [options]
```

We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:

```
python train_soft_actor_critic.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
```

### Useful Options

- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_soft_actor_critic.py --gpu -1`.
Expand All @@ -24,7 +30,7 @@ python train_soft_actor_critic.py [options]
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

To view the full list of options, either view the code or run the example with the `--help` option.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,6 @@ def burnin_action_func():
)

if len(args.load) > 0 or args.load_pretrained:
if args.load_pretrained:
raise Exception("Pretrained models are currently unsupported.")
# either load or load_pretrained must be false
assert not len(args.load) > 0 or not args.load_pretrained
if len(args.load) > 0:
Expand Down Expand Up @@ -255,6 +253,11 @@ def burnin_action_func():
eval_stats["stdev"],
)
)
import json
import os

with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
json.dump(eval_stats, f)
else:
experiments.train_agent_batch_with_evaluation(
agent=agent,
Expand Down
9 changes: 8 additions & 1 deletion examples/mujoco/reproduction/td3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ To run the training example:
python train_td3.py [options]
```

We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:

```
python train_td3.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
```


### Useful Options

- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_td3.py --gpu -1`.
Expand All @@ -23,7 +30,7 @@ python train_td3.py [options]
- `--outdir` This option specifies the output directory to which the results are written.
- `--demo`. Runs an evaluation, instead of training the agent.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

To view the full list of options, either view the code or run the example with the `--help` option.

Expand Down
Loading

0 comments on commit 4e45b8d

Please sign in to comment.