diff --git a/.pfnci/config.pbtxt b/.pfnci/config.pbtxt index 9a6fa5842..ed7e4c891 100644 --- a/.pfnci/config.pbtxt +++ b/.pfnci/config.pbtxt @@ -10,6 +10,7 @@ configs { seconds: 1200 } environment_variables { key: "GPU" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh gpu" } } @@ -27,6 +28,7 @@ configs { } environment_variables { key: "GPU" value: "1" } environment_variables { key: "SLOW" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh gpu" } } @@ -44,6 +46,7 @@ configs { } environment_variables { key: "GPU" value: "1" } environment_variables { key: "TEST_EXAMPLES" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh gpu" } } @@ -59,6 +62,7 @@ configs { seconds: 2400 } environment_variables { key: "SLOW" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh cpu" } } @@ -74,6 +78,7 @@ configs { time_limit { seconds: 1200 } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh cpu" } } @@ -88,6 +93,7 @@ configs { time_limit { seconds: 1200 } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } environment_variables { key: "TEST_EXAMPLES" value: "1" } command: "bash .pfnci/script.sh cpu" } @@ -104,6 +110,7 @@ configs { time_limit { seconds: 1200 } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/lint.sh" } } diff --git a/.pfnci/run.sh b/.pfnci/run.sh index 61bf4cf20..dbd78af73 100644 --- a/.pfnci/run.sh +++ b/.pfnci/run.sh @@ -52,7 +52,6 @@ main() { marker+=' and gpu' bucket="${GPU}" fi - marker+=' and not download_model' UBUNTU_VERSION_ID=$(grep DISTRIB_RELEASE /etc/lsb-release | cut -d "=" -f2) diff --git a/.pfnci/script.sh b/.pfnci/script.sh index f845e754b..e74f7f9ec 100644 --- a/.pfnci/script.sh +++ b/.pfnci/script.sh @@ -34,7 +34,7 @@ main() { wait # Prepare docker args. - docker_args=(docker run --rm --volume="$(pwd):/src:ro") + docker_args=(docker run --rm --volume="$(pwd):/src:ro" --volume="/root/.pfrl:/root/.pfrl/") if [ "${GPU:-0}" != '0' ]; then docker_args+=(--ipc=host --privileged --env="GPU=${GPU}" --runtime=nvidia) fi @@ -50,6 +50,14 @@ main() { docker_image=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-runtime docker_args+=(--env="SLOW=${SLOW:-0}") + for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip + do + gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} . + mkdir -p ~/.pfrl/models/ + unzip ${ZIP} -d ~/.pfrl/models/ + rm ${ZIP} + done + run "${docker_args[@]}" "${docker_image}" bash /src/.pfnci/run.sh "${TARGET}" } diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md index 064a3a328..cae67f45c 100644 --- a/examples/atari/reproduction/a3c/README.md +++ b/examples/atari/reproduction/a3c/README.md @@ -13,13 +13,19 @@ To run the training example: python train_a3c.py [options] ``` +We have already trained models from this script for all the domains listed in the [results](#Results). To load a pretrained model: +``` +python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best +``` + ### Useful Options - `--env`. Specifies the environment. - `--render`. Add this option to render the states in a GUI window. - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/atari/reproduction/a3c/train_a3c.py b/examples/atari/reproduction/a3c/train_a3c.py index 4e7a1887c..3913f1196 100644 --- a/examples/atari/reproduction/a3c/train_a3c.py +++ b/examples/atari/reproduction/a3c/train_a3c.py @@ -45,6 +45,9 @@ def main(): parser.add_argument("--eval-n-steps", type=int, default=125000) parser.add_argument("--demo", action="store_true", default=False) parser.add_argument("--load-pretrained", action="store_true", default=False) + parser.add_argument( + "--pretrained-type", type=str, default="best", choices=["best", "final"] + ) parser.add_argument("--load", type=str, default="") parser.add_argument( "--log-level", @@ -144,11 +147,17 @@ def phi(x): max_grad_norm=40.0, ) - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") - - if args.load: - agent.load(args.load) + if args.load or args.load_pretrained: + # either load or load_pretrained must be false + assert not args.load or not args.load_pretrained + if args.load: + agent.load(args.load) + else: + agent.load( + utils.download_model("A3C", args.env, model_type=args.pretrained_type)[ + 0 + ] + ) if args.demo: env = make_env(0, True) diff --git a/examples/atari/reproduction/dqn/README.md b/examples/atari/reproduction/dqn/README.md index fcd8e65b2..c87d5f423 100644 --- a/examples/atari/reproduction/dqn/README.md +++ b/examples/atari/reproduction/dqn/README.md @@ -5,6 +5,7 @@ This example trains a DQN agent, from the following paper: [Human-level control - atari_py>=0.1.1 - opencv-python +- filelock ## Running the Example @@ -12,6 +13,11 @@ To run the training example: ``` python train_dqn.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model: + +``` +python train_dqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1 +``` ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`. @@ -20,7 +26,7 @@ python train_dqn.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/atari/reproduction/dqn/train_dqn.py b/examples/atari/reproduction/dqn/train_dqn.py index 1677109e6..72c210ad5 100644 --- a/examples/atari/reproduction/dqn/train_dqn.py +++ b/examples/atari/reproduction/dqn/train_dqn.py @@ -163,8 +163,6 @@ def phi(x): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: diff --git a/examples/atari/reproduction/iqn/README.md b/examples/atari/reproduction/iqn/README.md index 128289a1b..a8ae62fae 100644 --- a/examples/atari/reproduction/iqn/README.md +++ b/examples/atari/reproduction/iqn/README.md @@ -13,6 +13,12 @@ To run the training example: python train_iqn.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model: + +``` +python train_iqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`. - `--env`. Specifies the environment. @@ -20,7 +26,7 @@ python train_iqn.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/atari/reproduction/iqn/train_iqn.py b/examples/atari/reproduction/iqn/train_iqn.py index 0e605467d..d4789f13a 100644 --- a/examples/atari/reproduction/iqn/train_iqn.py +++ b/examples/atari/reproduction/iqn/train_iqn.py @@ -162,8 +162,6 @@ def phi(x): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: diff --git a/examples/atari/reproduction/rainbow/README.md b/examples/atari/reproduction/rainbow/README.md index 8939771ce..d23d04022 100644 --- a/examples/atari/reproduction/rainbow/README.md +++ b/examples/atari/reproduction/rainbow/README.md @@ -13,6 +13,12 @@ To run the training example: python train_rainbow.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_rainbow.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_rainbow.py --gpu -1`. - `--env`. Specifies the environment. @@ -20,7 +26,7 @@ python train_rainbow.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/atari/reproduction/rainbow/train_rainbow.py b/examples/atari/reproduction/rainbow/train_rainbow.py index 5edbacc4b..609ea9261 100644 --- a/examples/atari/reproduction/rainbow/train_rainbow.py +++ b/examples/atari/reproduction/rainbow/train_rainbow.py @@ -154,8 +154,6 @@ def phi(x): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load_ or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: diff --git a/examples/mujoco/reproduction/ddpg/README.md b/examples/mujoco/reproduction/ddpg/README.md index 4386dbf0a..bdc824806 100644 --- a/examples/mujoco/reproduction/ddpg/README.md +++ b/examples/mujoco/reproduction/ddpg/README.md @@ -14,6 +14,11 @@ To run the training example: ``` python train_ddpg.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_ddpg.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` ### Useful Options @@ -23,7 +28,7 @@ python train_ddpg.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py index d8dca96a6..45614ead9 100644 --- a/examples/mujoco/reproduction/ddpg/train_ddpg.py +++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py @@ -175,8 +175,6 @@ def burnin_action_func(): ) if len(args.load) > 0 or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not len(args.load) > 0 or not args.load_pretrained if len(args.load) > 0: @@ -205,6 +203,11 @@ def burnin_action_func(): eval_stats["stdev"], ) ) + import json + import os + + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/ppo/README.md b/examples/mujoco/reproduction/ppo/README.md index 89b59b0f1..7170455c4 100644 --- a/examples/mujoco/reproduction/ppo/README.md +++ b/examples/mujoco/reproduction/ppo/README.md @@ -16,6 +16,12 @@ To run the training example: python train_ppo.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_ppo.py --demo --load-pretrained --env HalfCheetah-v2 --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_ppo.py --gpu -1`. @@ -24,7 +30,7 @@ python train_ppo.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py index 4ef24e37c..8bf7fbe5f 100644 --- a/examples/mujoco/reproduction/ppo/train_ppo.py +++ b/examples/mujoco/reproduction/ppo/train_ppo.py @@ -206,8 +206,6 @@ def ortho_init(layer, gain): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: @@ -232,6 +230,11 @@ def ortho_init(layer, gain): eval_stats["stdev"], ) ) + import json + import os + + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_batch_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/soft_actor_critic/README.md b/examples/mujoco/reproduction/soft_actor_critic/README.md index 02659d528..319fdd0c0 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/README.md +++ b/examples/mujoco/reproduction/soft_actor_critic/README.md @@ -15,6 +15,12 @@ To run the training example: python train_soft_actor_critic.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_soft_actor_critic.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_soft_actor_critic.py --gpu -1`. @@ -24,7 +30,7 @@ python train_soft_actor_critic.py [options] - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. -- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). +- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py index 91be09af6..929cb2925 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py +++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py @@ -226,8 +226,6 @@ def burnin_action_func(): ) if len(args.load) > 0 or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not len(args.load) > 0 or not args.load_pretrained if len(args.load) > 0: @@ -255,6 +253,11 @@ def burnin_action_func(): eval_stats["stdev"], ) ) + import json + import os + + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_batch_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/td3/README.md b/examples/mujoco/reproduction/td3/README.md index cc37ebc26..a9503b03c 100644 --- a/examples/mujoco/reproduction/td3/README.md +++ b/examples/mujoco/reproduction/td3/README.md @@ -14,6 +14,13 @@ To run the training example: python train_td3.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_td3.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` + + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_td3.py --gpu -1`. @@ -23,7 +30,7 @@ python train_td3.py [options] - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. -- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). +- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py index 64d978c1d..2ca26a44a 100644 --- a/examples/mujoco/reproduction/td3/train_td3.py +++ b/examples/mujoco/reproduction/td3/train_td3.py @@ -175,8 +175,6 @@ def burnin_action_func(): ) if len(args.load) > 0 or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not len(args.load) > 0 or not args.load_pretrained if len(args.load) > 0: @@ -205,6 +203,11 @@ def burnin_action_func(): eval_stats["stdev"], ) ) + import json + import os + + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/trpo/README.md b/examples/mujoco/reproduction/trpo/README.md index 6ecde7dff..1841ee7e4 100644 --- a/examples/mujoco/reproduction/trpo/README.md +++ b/examples/mujoco/reproduction/trpo/README.md @@ -16,6 +16,12 @@ To run the training example: python train_trpo.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_trpo.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_trpo.py --gpu -1`. diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py index 53e95c616..f9c88c79f 100644 --- a/examples/mujoco/reproduction/trpo/train_trpo.py +++ b/examples/mujoco/reproduction/trpo/train_trpo.py @@ -181,8 +181,6 @@ def ortho_init(layer, gain): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: @@ -211,6 +209,11 @@ def ortho_init(layer, gain): eval_stats["stdev"], ) ) + import json + import os + + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: pfrl.experiments.train_agent_with_evaluation( diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index ee470f70a..bbd266931 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -1,3 +1,162 @@ +"""This file is a fork from ChainerCV, an MIT-licensed project, +https://github.com/chainer/chainercv/blob/master/chainercv/utils/download.py +""" + +import hashlib +import os +import posixpath +import shutil +import sys +import tempfile +import time +import zipfile + +import filelock +from six.moves.urllib import request + +_models_root = os.environ.get( + "PFRL_MODELS_ROOT", os.path.join(os.path.expanduser("~"), ".pfrl", "models") +) + + +MODELS = { + "DQN": ["best", "final"], + "IQN": ["best", "final"], + "Rainbow": ["best", "final"], + "A3C": ["best", "final"], + "DDPG": ["best", "final"], + "TRPO": ["best", "final"], + "PPO": ["final"], + "TD3": ["best", "final"], + "SAC": ["best", "final"], +} + +download_url = "https://chainer-assets.preferred.jp/pfrl/" + + +def _get_model_directory(model_name, create_directory=True): + """Gets the path to the directory of given model. + + The generated path is just a concatenation of the global root directory + and the model name. This function forked from Chainer, an MIT-licensed project, + https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L43 + Args: + model_name (str): Name of the model. + create_directory (bool): If True (default), this function also creates + the directory at the first time. If the directory already exists, + then this option is ignored. + Returns: + str: Path to the dataset directory. + """ + path = os.path.join(_models_root, model_name) + if create_directory: + try: + os.makedirs(path) + except OSError: + if not os.path.isdir(path): + raise + return path + + +def _reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + print(" % Total Recv Speed Time left") + return + duration = time.time() - start_time + progress_size = count * block_size + try: + speed = progress_size / duration + except ZeroDivisionError: + speed = float("inf") + percent = progress_size / total_size * 100 + eta = int((total_size - progress_size) / speed) + sys.stdout.write( + "\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}".format( + percent, + total_size / (1 << 20), + progress_size / (1 << 20), + speed / (1 << 10), + eta // 60 // 60, + (eta // 60) % 60, + eta % 60, + ) + ) + sys.stdout.flush() + + +def cached_download(url): + """Downloads a file and caches it. + + It downloads a file from the URL if there is no corresponding cache. + If there is already a cache for the given URL, it just returns the + path to the cache without downloading the same file. + This function forked from Chainer, an MIT-licensed project, + https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L70 + Args: + url (string): URL to download from. + Returns: + string: Path to the downloaded file. + """ + cache_root = os.path.join(_models_root, "_dl_cache") + try: + os.makedirs(cache_root) + except OSError: + if not os.path.exists(cache_root): + raise + lock_path = os.path.join(cache_root, "_dl_lock") + urlhash = hashlib.md5(url.encode("utf-8")).hexdigest() + cache_path = os.path.join(cache_root, urlhash) + + with filelock.FileLock(lock_path): + if os.path.exists(cache_path): + return cache_path + temp_root = tempfile.mkdtemp(dir=cache_root) + try: + temp_path = os.path.join(temp_root, "dl") + print("Downloading ...") + print("From: {:s}".format(url)) + print("To: {:s}".format(cache_path)) + request.urlretrieve(url, temp_path, _reporthook) + with filelock.FileLock(lock_path): + shutil.move(temp_path, cache_path) + finally: + shutil.rmtree(temp_root) + + return cache_path + + +def download_and_store_model(alg, url, env, model_type): + """Downloads a model file and puts it under model directory. + + It downloads a file from the URL and puts it under model directory. + If there is already a file at the destination path, + it just returns the path without downloading the same file. + Args: + alg (string): String representation of algorithm used in MODELS dict. + url (string): URL to download from. + env (string): Environment in which pretrained model was trained. + model_type (string): Either `best` or `final`. + Returns: + string: Path to the downloaded file. + bool: whether the model was already cached. + """ + lock = os.path.join(_get_model_directory(".lock"), "models.lock") + with filelock.FileLock(lock): + root = _get_model_directory(os.path.join(alg, env)) + url_basepath = posixpath.join(url, alg, env) + file = model_type + ".zip" + path = os.path.join(root, file) + is_cached = os.path.exists(path) + if not is_cached: + cache_path = cached_download(posixpath.join(url_basepath, file)) + os.rename(cache_path, path) + with zipfile.ZipFile(path, "r") as zip_ref: + zip_ref.extractall(root) + return os.path.join(root, model_type), is_cached + + def download_model(alg, env, model_type="best"): """Downloads and returns pretrained model. @@ -9,4 +168,10 @@ def download_model(alg, env, model_type="best"): str: Path to the downloaded file. bool: whether the model was already cached. """ - raise NotImplementedError() + assert alg in MODELS, "No pretrained models for " + alg + "." + assert model_type in MODELS[alg], ( + 'Model type "' + model_type + '" is not supported.' + ) + env = env.replace("NoFrameskip-v4", "") + model_path, is_cached = download_and_store_model(alg, download_url, env, model_type) + return model_path, is_cached diff --git a/requirements.txt b/requirements.txt index 43bb94eed..45b6e8b0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ torch>=1.3.0 gym>=0.9.7 numpy>=1.10.4 +filelock pillow diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py new file mode 100644 index 000000000..e1ed17553 --- /dev/null +++ b/tests/utils_tests/test_pretrained_models.py @@ -0,0 +1,590 @@ +import os + +import numpy as np +import pytest +import torch +from torch import nn + +import pfrl +import pfrl.nn as pnn +from pfrl import agents, explorers, replay_buffers +from pfrl.initializers import init_chainer_default +from pfrl.utils import download_model + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadDQN: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_dqn(self, gpu): + from pfrl.q_functions import DiscreteActionValueHead + + n_actions = 4 + q_func = nn.Sequential( + pnn.LargeAtariCNN(), + init_chainer_default(nn.Linear(512, n_actions)), + DiscreteActionValueHead(), + ) + + # Use the same hyperparameters as the Nature paper + + opt = pfrl.optimizers.RMSpropEpsInsideSqrt( + q_func.parameters(), + lr=2.5e-4, + alpha=0.95, + momentum=0.0, + eps=1e-2, + centered=True, + ) + + rbuf = replay_buffers.ReplayBuffer(100) + + explorer = explorers.LinearDecayEpsilonGreedy( + start_epsilon=1.0, + end_epsilon=0.1, + decay_steps=10 ** 6, + random_action_func=lambda: np.random.randint(4), + ) + + agent = agents.DQN( + q_func, + opt, + rbuf, + gpu=gpu, + gamma=0.99, + explorer=explorer, + replay_start_size=50, + target_update_interval=10 ** 4, + clip_delta=True, + update_interval=4, + batch_accumulator="sum", + phi=lambda x: x, + ) + + downloaded_model, exists = download_model( + "DQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_dqn(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_dqn(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadIQN: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_iqn(self, gpu): + n_actions = 4 + q_func = pfrl.agents.iqn.ImplicitQuantileQFunction( + psi=nn.Sequential( + nn.Conv2d(4, 32, 8, stride=4), + nn.ReLU(), + nn.Conv2d(32, 64, 4, stride=2), + nn.ReLU(), + nn.Conv2d(64, 64, 3, stride=1), + nn.ReLU(), + nn.Flatten(), + ), + phi=nn.Sequential(pfrl.agents.iqn.CosineBasisLinear(64, 3136), nn.ReLU(),), + f=nn.Sequential( + nn.Linear(3136, 512), nn.ReLU(), nn.Linear(512, n_actions), + ), + ) + + # Use the same hyper parameters as https://arxiv.org/abs/1710.10044 + opt = torch.optim.Adam(q_func.parameters(), lr=5e-5, eps=1e-2 / 32) + + rbuf = replay_buffers.ReplayBuffer(100) + + explorer = explorers.LinearDecayEpsilonGreedy( + start_epsilon=1.0, + end_epsilon=0.1, + decay_steps=10 ** 6, + random_action_func=lambda: np.random.randint(4), + ) + + agent = agents.IQN( + q_func, + opt, + rbuf, + gpu=gpu, + gamma=0.99, + explorer=explorer, + replay_start_size=50, + target_update_interval=10 ** 4, + update_interval=4, + batch_accumulator="mean", + phi=lambda x: x, + quantile_thresholds_N=64, + quantile_thresholds_N_prime=64, + quantile_thresholds_K=32, + ) + + downloaded_model, exists = download_model( + "IQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_iqn(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_iqn(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadRainbow: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_rainbow(self, gpu): + from pfrl.q_functions import DistributionalDuelingDQN + + q_func = DistributionalDuelingDQN(4, 51, -10, 10) + pnn.to_factorized_noisy(q_func, sigma_scale=0.5) + explorer = explorers.Greedy() + opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10 ** -4) + rbuf = replay_buffers.ReplayBuffer(100) + agent = agents.CategoricalDoubleDQN( + q_func, + opt, + rbuf, + gpu=gpu, + gamma=0.99, + explorer=explorer, + minibatch_size=32, + replay_start_size=50, + target_update_interval=32000, + update_interval=4, + batch_accumulator="mean", + phi=lambda x: x, + ) + + downloaded_model, exists = download_model( + "Rainbow", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_rainbow(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_rainbow(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadA3C: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def test_load_a3c(self): + from pfrl.policies import SoftmaxCategoricalHead + + obs_size = 4 + n_actions = 4 + a3c_model = nn.Sequential( + nn.Conv2d(obs_size, 16, 8, stride=4), + nn.ReLU(), + nn.Conv2d(16, 32, 4, stride=2), + nn.ReLU(), + nn.Flatten(), + nn.Linear(2592, 256), + nn.ReLU(), + pfrl.nn.Branched( + nn.Sequential(nn.Linear(256, n_actions), SoftmaxCategoricalHead(),), + nn.Linear(256, 1), + ), + ) + from pfrl.optimizers import SharedRMSpropEpsInsideSqrt + + opt = SharedRMSpropEpsInsideSqrt( + a3c_model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99 + ) + agent = agents.A3C( + a3c_model, opt, t_max=5, gamma=0.99, beta=1e-2, phi=lambda x: x + ) + downloaded_model, exists = download_model( + "A3C", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadDDPG: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_ddpg(self, gpu): + + obs_size = 11 + action_size = 3 + from pfrl.nn import ConcatObsAndAction + + q_func = nn.Sequential( + ConcatObsAndAction(), + nn.Linear(obs_size + action_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, 1), + ) + from pfrl.nn import BoundByTanh + from pfrl.policies import DeterministicHead + + policy = nn.Sequential( + nn.Linear(obs_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, action_size), + BoundByTanh(low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0]), + DeterministicHead(), + ) + + opt_a = torch.optim.Adam(policy.parameters()) + opt_c = torch.optim.Adam(q_func.parameters()) + + explorer = explorers.AdditiveGaussian( + scale=0.1, low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0] + ) + + agent = agents.DDPG( + policy, + q_func, + opt_a, + opt_c, + replay_buffers.ReplayBuffer(100), + gamma=0.99, + explorer=explorer, + replay_start_size=1000, + target_update_method="soft", + target_update_interval=1, + update_interval=1, + soft_update_tau=5e-3, + n_times_update=1, + gpu=gpu, + minibatch_size=100, + burnin_action_func=None, + ) + + downloaded_model, exists = download_model( + "DDPG", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_ddpg(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_ddpg(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadTRPO: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_trpo(self, gpu): + obs_size = 11 + action_size = 3 + + policy = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, action_size), + pfrl.policies.GaussianHeadWithStateIndependentCovariance( + action_size=action_size, + var_type="diagonal", + var_func=lambda x: torch.exp(2 * x), # Parameterize log std + var_param_init=0, # log std = 0 => std = 1 + ), + ) + + vf = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, 1), + ) + vf_opt = torch.optim.Adam(vf.parameters()) + + agent = agents.TRPO( + policy=policy, + vf=vf, + vf_optimizer=vf_opt, + gpu=gpu, + update_interval=5000, + max_kl=0.01, + conjugate_gradient_max_iter=20, + conjugate_gradient_damping=1e-1, + gamma=0.995, + lambd=0.97, + vf_epochs=5, + entropy_coef=0, + ) + + downloaded_model, exists = download_model( + "TRPO", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_trpo(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_trpo(gpu=0) + + +class TestLoadPPO: + def _test_load_ppo(self, gpu): + obs_size = 11 + action_size = 3 + from pfrl.policies import GaussianHeadWithStateIndependentCovariance + + policy = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, action_size), + GaussianHeadWithStateIndependentCovariance( + action_size=action_size, + var_type="diagonal", + var_func=lambda x: torch.exp(2 * x), # Parameterize log std + var_param_init=0, # log std = 0 => std = 1 + ), + ) + + vf = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, 1), + ) + + model = pnn.Branched(policy, vf) + opt = torch.optim.Adam(model.parameters(), lr=3e-4, eps=1e-5) + + agent = agents.PPO( + model, + opt, + obs_normalizer=None, + gpu=gpu, + update_interval=2048, + minibatch_size=64, + epochs=10, + clip_eps_vf=None, + entropy_coef=0, + standardize_advantages=True, + gamma=0.995, + lambd=0.97, + ) + + downloaded_model, exists = download_model( + "PPO", "Hopper-v2", model_type="final" + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_ppo(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_ppo(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadTD3: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_td3(self, gpu): + + obs_size = 11 + action_size = 3 + + def make_q_func_with_optimizer(): + q_func = nn.Sequential( + pnn.ConcatObsAndAction(), + nn.Linear(obs_size + action_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, 1), + ) + q_func_optimizer = torch.optim.Adam(q_func.parameters()) + return q_func, q_func_optimizer + + q_func1, q_func1_optimizer = make_q_func_with_optimizer() + q_func2, q_func2_optimizer = make_q_func_with_optimizer() + + policy = nn.Sequential( + nn.Linear(obs_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, action_size), + nn.Tanh(), + pfrl.policies.DeterministicHead(), + ) + policy_optimizer = torch.optim.Adam(policy.parameters()) + + rbuf = replay_buffers.ReplayBuffer(100) + explorer = explorers.AdditiveGaussian( + scale=0.1, low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0] + ) + + agent = agents.TD3( + policy, + q_func1, + q_func2, + policy_optimizer, + q_func1_optimizer, + q_func2_optimizer, + rbuf, + gamma=0.99, + soft_update_tau=5e-3, + explorer=explorer, + replay_start_size=1000, + gpu=gpu, + minibatch_size=100, + burnin_action_func=None, + ) + + downloaded_model, exists = download_model( + "TD3", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_td3(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_td3(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadSAC: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_sac(self, gpu): + obs_size = 11 + action_size = 3 + + def squashed_diagonal_gaussian_head(x): + assert x.shape[-1] == action_size * 2 + mean, log_scale = torch.chunk(x, 2, dim=1) + log_scale = torch.clamp(log_scale, -20.0, 2.0) + var = torch.exp(log_scale * 2) + from torch import distributions + + base_distribution = distributions.Independent( + distributions.Normal(loc=mean, scale=torch.sqrt(var)), 1 + ) + # cache_size=1 is required for numerical stability + return distributions.transformed_distribution.TransformedDistribution( + base_distribution, + [distributions.transforms.TanhTransform(cache_size=1)], + ) + + from pfrl.nn.lmbda import Lambda + + policy = nn.Sequential( + nn.Linear(obs_size, 256), + nn.ReLU(), + nn.Linear(256, 256), + nn.ReLU(), + nn.Linear(256, action_size * 2), + Lambda(squashed_diagonal_gaussian_head), + ) + policy_optimizer = torch.optim.Adam(policy.parameters(), lr=3e-4) + + def make_q_func_with_optimizer(): + q_func = nn.Sequential( + pfrl.nn.ConcatObsAndAction(), + nn.Linear(obs_size + action_size, 256), + nn.ReLU(), + nn.Linear(256, 256), + nn.ReLU(), + nn.Linear(256, 1), + ) + torch.nn.init.xavier_uniform_(q_func[1].weight) + torch.nn.init.xavier_uniform_(q_func[3].weight) + torch.nn.init.xavier_uniform_(q_func[5].weight) + q_func_optimizer = torch.optim.Adam(q_func.parameters(), lr=3e-4) + return q_func, q_func_optimizer + + q_func1, q_func1_optimizer = make_q_func_with_optimizer() + q_func2, q_func2_optimizer = make_q_func_with_optimizer() + + agent = agents.SoftActorCritic( + policy, + q_func1, + q_func2, + policy_optimizer, + q_func1_optimizer, + q_func2_optimizer, + replay_buffers.ReplayBuffer(100), + gamma=0.99, + replay_start_size=1000, + gpu=gpu, + minibatch_size=256, + burnin_action_func=None, + entropy_target=-3, + temperature_optimizer_lr=3e-4, + ) + + downloaded_model, exists = download_model( + "SAC", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_sac(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_sac(gpu=0)