Merge pull request #33 from prabhatnagarajan/pretrained_models

Pretrained models
pfnet · Dec 16, 2020 · 4e45b8d · 4e45b8d
2 parents 322fa45 + 719dfce
commit 4e45b8d
Show file tree

Hide file tree

Showing 24 changed files with 874 additions and 32 deletions.
diff --git a/.pfnci/config.pbtxt b/.pfnci/config.pbtxt
@@ -10,6 +10,7 @@ configs {
       seconds: 1200
     }
     environment_variables { key: "GPU" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh gpu"
   }
 }
@@ -27,6 +28,7 @@ configs {
     }
     environment_variables { key: "GPU" value: "1" }
     environment_variables { key: "SLOW" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh gpu"
   }
 }
@@ -44,6 +46,7 @@ configs {
     }
     environment_variables { key: "GPU" value: "1" }
     environment_variables { key: "TEST_EXAMPLES" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh gpu"
   }
 }
@@ -59,6 +62,7 @@ configs {
       seconds: 2400
     }
     environment_variables { key: "SLOW" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh cpu"
   }
 }
@@ -74,6 +78,7 @@ configs {
     time_limit {
       seconds: 1200
     }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh cpu"
   }
 }
@@ -88,6 +93,7 @@ configs {
     time_limit {
       seconds: 1200
     }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     environment_variables { key: "TEST_EXAMPLES" value: "1" }
     command: "bash .pfnci/script.sh cpu"
   }
@@ -104,6 +110,7 @@ configs {
     time_limit {
       seconds: 1200
     }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/lint.sh"
   }
 }
diff --git a/.pfnci/run.sh b/.pfnci/run.sh
@@ -52,7 +52,6 @@ main() {
     marker+=' and gpu'
     bucket="${GPU}"
   fi
-  marker+=' and not download_model'
 
 
   UBUNTU_VERSION_ID=$(grep DISTRIB_RELEASE /etc/lsb-release | cut -d "=" -f2)

diff --git a/.pfnci/script.sh b/.pfnci/script.sh
@@ -34,7 +34,7 @@ main() {
   wait
 
   # Prepare docker args.
-  docker_args=(docker run  --rm --volume="$(pwd):/src:ro") 
+  docker_args=(docker run  --rm --volume="$(pwd):/src:ro" --volume="/root/.pfrl:/root/.pfrl/") 
   if [ "${GPU:-0}" != '0' ]; then
     docker_args+=(--ipc=host --privileged --env="GPU=${GPU}" --runtime=nvidia)
   fi
@@ -50,6 +50,14 @@ main() {
   docker_image=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-runtime
   docker_args+=(--env="SLOW=${SLOW:-0}")
 
+  for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip
+  do
+      gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} .
+      mkdir -p ~/.pfrl/models/
+      unzip ${ZIP} -d ~/.pfrl/models/
+      rm ${ZIP}
+  done
+
   run "${docker_args[@]}" "${docker_image}" bash /src/.pfnci/run.sh "${TARGET}"
 }
 

diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md
@@ -13,13 +13,19 @@ To run the training example:
 python train_a3c.py [options]
 ```
 
+We have already trained models from this script for all the domains listed in the [results](#Results). To load a pretrained model:
+```
+python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best
+```
+
 ### Useful Options
 - `--env`. Specifies the environment. 
 - `--render`. Add this option to render the states in a GUI window.
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 

diff --git a/examples/atari/reproduction/a3c/train_a3c.py b/examples/atari/reproduction/a3c/train_a3c.py
@@ -45,6 +45,9 @@ def main():
     parser.add_argument("--eval-n-steps", type=int, default=125000)
     parser.add_argument("--demo", action="store_true", default=False)
     parser.add_argument("--load-pretrained", action="store_true", default=False)
+    parser.add_argument(
+        "--pretrained-type", type=str, default="best", choices=["best", "final"]
+    )
     parser.add_argument("--load", type=str, default="")
     parser.add_argument(
         "--log-level",
@@ -144,11 +147,17 @@ def phi(x):
         max_grad_norm=40.0,
     )
 
-    if args.load_pretrained:
-        raise Exception("Pretrained models are currently unsupported.")
-
-    if args.load:
-        agent.load(args.load)
+    if args.load or args.load_pretrained:
+        # either load or load_pretrained must be false
+        assert not args.load or not args.load_pretrained
+        if args.load:
+            agent.load(args.load)
+        else:
+            agent.load(
+                utils.download_model("A3C", args.env, model_type=args.pretrained_type)[
+                    0
+                ]
+            )
 
     if args.demo:
         env = make_env(0, True)

diff --git a/examples/atari/reproduction/dqn/README.md b/examples/atari/reproduction/dqn/README.md
@@ -5,13 +5,19 @@ This example trains a DQN agent, from the following paper: [Human-level control
 
 - atari_py>=0.1.1
 - opencv-python
+- filelock
 
 ## Running the Example
 
 To run the training example:
 ```
 python train_dqn.py [options]
 ```
+We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model:
+
+```
+python train_dqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
 
 ### Useful Options
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
@@ -20,7 +26,7 @@ python train_dqn.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.

diff --git a/examples/atari/reproduction/dqn/train_dqn.py b/examples/atari/reproduction/dqn/train_dqn.py
@@ -163,8 +163,6 @@ def phi(x):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:

diff --git a/examples/atari/reproduction/iqn/README.md b/examples/atari/reproduction/iqn/README.md
@@ -13,14 +13,20 @@ To run the training example:
 python train_iqn.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model:
+
+```
+python train_iqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
+
 ### Useful Options
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
 - `--env`. Specifies the environment. 
 - `--render`. Add this option to render the states in a GUI window.
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.

diff --git a/examples/atari/reproduction/iqn/train_iqn.py b/examples/atari/reproduction/iqn/train_iqn.py
@@ -162,8 +162,6 @@ def phi(x):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:

diff --git a/examples/atari/reproduction/rainbow/README.md b/examples/atari/reproduction/rainbow/README.md
@@ -13,14 +13,20 @@ To run the training example:
 python train_rainbow.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_rainbow.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
+
 ### Useful Options
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_rainbow.py --gpu -1`.
 - `--env`. Specifies the environment. 
 - `--render`. Add this option to render the states in a GUI window.
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.

diff --git a/examples/atari/reproduction/rainbow/train_rainbow.py b/examples/atari/reproduction/rainbow/train_rainbow.py
@@ -154,8 +154,6 @@ def phi(x):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load_ or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:

diff --git a/examples/mujoco/reproduction/ddpg/README.md b/examples/mujoco/reproduction/ddpg/README.md
@@ -14,6 +14,11 @@ To run the training example:
 ```
 python train_ddpg.py [options]
 ```
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_ddpg.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
+```
 
 ### Useful Options
 
@@ -23,7 +28,7 @@ python train_ddpg.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 

diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py
@@ -175,8 +175,6 @@ def burnin_action_func():
     )
 
     if len(args.load) > 0 or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not len(args.load) > 0 or not args.load_pretrained
         if len(args.load) > 0:
@@ -205,6 +203,11 @@ def burnin_action_func():
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
         experiments.train_agent_with_evaluation(
             agent=agent,

diff --git a/examples/mujoco/reproduction/ppo/README.md b/examples/mujoco/reproduction/ppo/README.md
@@ -16,6 +16,12 @@ To run the training example:
 python train_ppo.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_ppo.py --demo --load-pretrained --env HalfCheetah-v2 --gpu -1
+```
+
 ### Useful Options
 
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_ppo.py --gpu -1`.
@@ -24,7 +30,7 @@ python train_ppo.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 

diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py
@@ -206,8 +206,6 @@ def ortho_init(layer, gain):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:
@@ -232,6 +230,11 @@ def ortho_init(layer, gain):
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
         experiments.train_agent_batch_with_evaluation(
             agent=agent,

diff --git a/examples/mujoco/reproduction/soft_actor_critic/README.md b/examples/mujoco/reproduction/soft_actor_critic/README.md
@@ -15,6 +15,12 @@ To run the training example:
 python train_soft_actor_critic.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_soft_actor_critic.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
+```
+
 ### Useful Options
 
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_soft_actor_critic.py --gpu -1`.
@@ -24,7 +30,7 @@ python train_soft_actor_critic.py [options]
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
 - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
-- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 

diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py
@@ -226,8 +226,6 @@ def burnin_action_func():
     )
 
     if len(args.load) > 0 or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not len(args.load) > 0 or not args.load_pretrained
         if len(args.load) > 0:
@@ -255,6 +253,11 @@ def burnin_action_func():
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
         experiments.train_agent_batch_with_evaluation(
             agent=agent,

diff --git a/examples/mujoco/reproduction/td3/README.md b/examples/mujoco/reproduction/td3/README.md
@@ -14,6 +14,13 @@ To run the training example:
 python train_td3.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_td3.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
+```
+
+
 ### Useful Options
 
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_td3.py --gpu -1`.
@@ -23,7 +30,7 @@ python train_td3.py [options]
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
 - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
-- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.