diff --git a/.pfnci/config.pbtxt b/.pfnci/config.pbtxt
index 9a6fa5842..ed7e4c891 100644
--- a/.pfnci/config.pbtxt
+++ b/.pfnci/config.pbtxt
@@ -10,6 +10,7 @@ configs {
       seconds: 1200
     }
     environment_variables { key: "GPU" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh gpu"
   }
 }
@@ -27,6 +28,7 @@ configs {
     }
     environment_variables { key: "GPU" value: "1" }
     environment_variables { key: "SLOW" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh gpu"
   }
 }
@@ -44,6 +46,7 @@ configs {
     }
     environment_variables { key: "GPU" value: "1" }
     environment_variables { key: "TEST_EXAMPLES" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh gpu"
   }
 }
@@ -59,6 +62,7 @@ configs {
       seconds: 2400
     }
     environment_variables { key: "SLOW" value: "1" }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh cpu"
   }
 }
@@ -74,6 +78,7 @@ configs {
     time_limit {
       seconds: 1200
     }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/script.sh cpu"
   }
 }
@@ -88,6 +93,7 @@ configs {
     time_limit {
       seconds: 1200
     }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     environment_variables { key: "TEST_EXAMPLES" value: "1" }
     command: "bash .pfnci/script.sh cpu"
   }
@@ -104,6 +110,7 @@ configs {
     time_limit {
       seconds: 1200
     }
+    environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" }
     command: "bash .pfnci/lint.sh"
   }
 }
diff --git a/.pfnci/run.sh b/.pfnci/run.sh
index 61bf4cf20..dbd78af73 100644
--- a/.pfnci/run.sh
+++ b/.pfnci/run.sh
@@ -52,7 +52,6 @@ main() {
     marker+=' and gpu'
     bucket="${GPU}"
   fi
-  marker+=' and not download_model'
 
 
   UBUNTU_VERSION_ID=$(grep DISTRIB_RELEASE /etc/lsb-release | cut -d "=" -f2)
diff --git a/.pfnci/script.sh b/.pfnci/script.sh
index f845e754b..e74f7f9ec 100644
--- a/.pfnci/script.sh
+++ b/.pfnci/script.sh
@@ -34,7 +34,7 @@ main() {
   wait
 
   # Prepare docker args.
-  docker_args=(docker run  --rm --volume="$(pwd):/src:ro") 
+  docker_args=(docker run  --rm --volume="$(pwd):/src:ro" --volume="/root/.pfrl:/root/.pfrl/") 
   if [ "${GPU:-0}" != '0' ]; then
     docker_args+=(--ipc=host --privileged --env="GPU=${GPU}" --runtime=nvidia)
   fi
@@ -50,6 +50,14 @@ main() {
   docker_image=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-runtime
   docker_args+=(--env="SLOW=${SLOW:-0}")
 
+  for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip
+  do
+      gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} .
+      mkdir -p ~/.pfrl/models/
+      unzip ${ZIP} -d ~/.pfrl/models/
+      rm ${ZIP}
+  done
+
   run "${docker_args[@]}" "${docker_image}" bash /src/.pfnci/run.sh "${TARGET}"
 }
 
diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md
index 064a3a328..cae67f45c 100644
--- a/examples/atari/reproduction/a3c/README.md
+++ b/examples/atari/reproduction/a3c/README.md
@@ -13,13 +13,19 @@ To run the training example:
 python train_a3c.py [options]
 ```
 
+We have already trained models from this script for all the domains listed in the [results](#Results). To load a pretrained model:
+```
+python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best
+```
+
 ### Useful Options
 - `--env`. Specifies the environment. 
 - `--render`. Add this option to render the states in a GUI window.
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
diff --git a/examples/atari/reproduction/a3c/train_a3c.py b/examples/atari/reproduction/a3c/train_a3c.py
index 4e7a1887c..3913f1196 100644
--- a/examples/atari/reproduction/a3c/train_a3c.py
+++ b/examples/atari/reproduction/a3c/train_a3c.py
@@ -45,6 +45,9 @@ def main():
     parser.add_argument("--eval-n-steps", type=int, default=125000)
     parser.add_argument("--demo", action="store_true", default=False)
     parser.add_argument("--load-pretrained", action="store_true", default=False)
+    parser.add_argument(
+        "--pretrained-type", type=str, default="best", choices=["best", "final"]
+    )
     parser.add_argument("--load", type=str, default="")
     parser.add_argument(
         "--log-level",
@@ -144,11 +147,17 @@ def phi(x):
         max_grad_norm=40.0,
     )
 
-    if args.load_pretrained:
-        raise Exception("Pretrained models are currently unsupported.")
-
-    if args.load:
-        agent.load(args.load)
+    if args.load or args.load_pretrained:
+        # either load or load_pretrained must be false
+        assert not args.load or not args.load_pretrained
+        if args.load:
+            agent.load(args.load)
+        else:
+            agent.load(
+                utils.download_model("A3C", args.env, model_type=args.pretrained_type)[
+                    0
+                ]
+            )
 
     if args.demo:
         env = make_env(0, True)
diff --git a/examples/atari/reproduction/dqn/README.md b/examples/atari/reproduction/dqn/README.md
index fcd8e65b2..c87d5f423 100644
--- a/examples/atari/reproduction/dqn/README.md
+++ b/examples/atari/reproduction/dqn/README.md
@@ -5,6 +5,7 @@ This example trains a DQN agent, from the following paper: [Human-level control
 
 - atari_py>=0.1.1
 - opencv-python
+- filelock
 
 ## Running the Example
 
@@ -12,6 +13,11 @@ To run the training example:
 ```
 python train_dqn.py [options]
 ```
+We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model:
+
+```
+python train_dqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
 
 ### Useful Options
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
@@ -20,7 +26,7 @@ python train_dqn.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
diff --git a/examples/atari/reproduction/dqn/train_dqn.py b/examples/atari/reproduction/dqn/train_dqn.py
index 1677109e6..72c210ad5 100644
--- a/examples/atari/reproduction/dqn/train_dqn.py
+++ b/examples/atari/reproduction/dqn/train_dqn.py
@@ -163,8 +163,6 @@ def phi(x):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:
diff --git a/examples/atari/reproduction/iqn/README.md b/examples/atari/reproduction/iqn/README.md
index 128289a1b..a8ae62fae 100644
--- a/examples/atari/reproduction/iqn/README.md
+++ b/examples/atari/reproduction/iqn/README.md
@@ -13,6 +13,12 @@ To run the training example:
 python train_iqn.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model:
+
+```
+python train_iqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
+
 ### Useful Options
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
 - `--env`. Specifies the environment. 
@@ -20,7 +26,7 @@ python train_iqn.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
diff --git a/examples/atari/reproduction/iqn/train_iqn.py b/examples/atari/reproduction/iqn/train_iqn.py
index 0e605467d..d4789f13a 100644
--- a/examples/atari/reproduction/iqn/train_iqn.py
+++ b/examples/atari/reproduction/iqn/train_iqn.py
@@ -162,8 +162,6 @@ def phi(x):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:
diff --git a/examples/atari/reproduction/rainbow/README.md b/examples/atari/reproduction/rainbow/README.md
index 8939771ce..d23d04022 100644
--- a/examples/atari/reproduction/rainbow/README.md
+++ b/examples/atari/reproduction/rainbow/README.md
@@ -13,6 +13,12 @@ To run the training example:
 python train_rainbow.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_rainbow.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
+
 ### Useful Options
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_rainbow.py --gpu -1`.
 - `--env`. Specifies the environment. 
@@ -20,7 +26,7 @@ python train_rainbow.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
diff --git a/examples/atari/reproduction/rainbow/train_rainbow.py b/examples/atari/reproduction/rainbow/train_rainbow.py
index 5edbacc4b..609ea9261 100644
--- a/examples/atari/reproduction/rainbow/train_rainbow.py
+++ b/examples/atari/reproduction/rainbow/train_rainbow.py
@@ -154,8 +154,6 @@ def phi(x):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load_ or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:
diff --git a/examples/mujoco/reproduction/ddpg/README.md b/examples/mujoco/reproduction/ddpg/README.md
index 4386dbf0a..bdc824806 100644
--- a/examples/mujoco/reproduction/ddpg/README.md
+++ b/examples/mujoco/reproduction/ddpg/README.md
@@ -14,6 +14,11 @@ To run the training example:
 ```
 python train_ddpg.py [options]
 ```
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_ddpg.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
+```
 
 ### Useful Options
 
@@ -23,7 +28,7 @@ python train_ddpg.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 
diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py
index d8dca96a6..45614ead9 100644
--- a/examples/mujoco/reproduction/ddpg/train_ddpg.py
+++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py
@@ -175,8 +175,6 @@ def burnin_action_func():
     )
 
     if len(args.load) > 0 or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not len(args.load) > 0 or not args.load_pretrained
         if len(args.load) > 0:
@@ -205,6 +203,11 @@ def burnin_action_func():
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
         experiments.train_agent_with_evaluation(
             agent=agent,
diff --git a/examples/mujoco/reproduction/ppo/README.md b/examples/mujoco/reproduction/ppo/README.md
index 89b59b0f1..7170455c4 100644
--- a/examples/mujoco/reproduction/ppo/README.md
+++ b/examples/mujoco/reproduction/ppo/README.md
@@ -16,6 +16,12 @@ To run the training example:
 python train_ppo.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_ppo.py --demo --load-pretrained --env HalfCheetah-v2 --gpu -1
+```
+
 ### Useful Options
 
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_ppo.py --gpu -1`.
@@ -24,7 +30,7 @@ python train_ppo.py [options]
 - `--seed`. This option specifies the random seed used.
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
-- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py
index 4ef24e37c..8bf7fbe5f 100644
--- a/examples/mujoco/reproduction/ppo/train_ppo.py
+++ b/examples/mujoco/reproduction/ppo/train_ppo.py
@@ -206,8 +206,6 @@ def ortho_init(layer, gain):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:
@@ -232,6 +230,11 @@ def ortho_init(layer, gain):
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
         experiments.train_agent_batch_with_evaluation(
             agent=agent,
diff --git a/examples/mujoco/reproduction/soft_actor_critic/README.md b/examples/mujoco/reproduction/soft_actor_critic/README.md
index 02659d528..319fdd0c0 100644
--- a/examples/mujoco/reproduction/soft_actor_critic/README.md
+++ b/examples/mujoco/reproduction/soft_actor_critic/README.md
@@ -15,6 +15,12 @@ To run the training example:
 python train_soft_actor_critic.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_soft_actor_critic.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
+```
+
 ### Useful Options
 
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_soft_actor_critic.py --gpu -1`.
@@ -24,7 +30,7 @@ python train_soft_actor_critic.py [options]
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
 - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
-- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py
index 91be09af6..929cb2925 100644
--- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py
+++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py
@@ -226,8 +226,6 @@ def burnin_action_func():
     )
 
     if len(args.load) > 0 or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not len(args.load) > 0 or not args.load_pretrained
         if len(args.load) > 0:
@@ -255,6 +253,11 @@ def burnin_action_func():
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
         experiments.train_agent_batch_with_evaluation(
             agent=agent,
diff --git a/examples/mujoco/reproduction/td3/README.md b/examples/mujoco/reproduction/td3/README.md
index cc37ebc26..a9503b03c 100644
--- a/examples/mujoco/reproduction/td3/README.md
+++ b/examples/mujoco/reproduction/td3/README.md
@@ -14,6 +14,13 @@ To run the training example:
 python train_td3.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_td3.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
+```
+
+
 ### Useful Options
 
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_td3.py --gpu -1`.
@@ -23,7 +30,7 @@ python train_td3.py [options]
 - `--outdir` This option specifies the output directory to which the results are written.
 - `--demo`. Runs an evaluation, instead of training the agent.
 - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
-- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py
index 64d978c1d..2ca26a44a 100644
--- a/examples/mujoco/reproduction/td3/train_td3.py
+++ b/examples/mujoco/reproduction/td3/train_td3.py
@@ -175,8 +175,6 @@ def burnin_action_func():
     )
 
     if len(args.load) > 0 or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not len(args.load) > 0 or not args.load_pretrained
         if len(args.load) > 0:
@@ -205,6 +203,11 @@ def burnin_action_func():
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
         experiments.train_agent_with_evaluation(
             agent=agent,
diff --git a/examples/mujoco/reproduction/trpo/README.md b/examples/mujoco/reproduction/trpo/README.md
index 6ecde7dff..1841ee7e4 100644
--- a/examples/mujoco/reproduction/trpo/README.md
+++ b/examples/mujoco/reproduction/trpo/README.md
@@ -16,6 +16,12 @@ To run the training example:
 python train_trpo.py [options]
 ```
 
+We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_trpo.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1
+```
+
 ### Useful Options
 
 - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_trpo.py --gpu -1`.
diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py
index 53e95c616..f9c88c79f 100644
--- a/examples/mujoco/reproduction/trpo/train_trpo.py
+++ b/examples/mujoco/reproduction/trpo/train_trpo.py
@@ -181,8 +181,6 @@ def ortho_init(layer, gain):
     )
 
     if args.load or args.load_pretrained:
-        if args.load_pretrained:
-            raise Exception("Pretrained models are currently unsupported.")
         # either load or load_pretrained must be false
         assert not args.load or not args.load_pretrained
         if args.load:
@@ -211,6 +209,11 @@ def ortho_init(layer, gain):
                 eval_stats["stdev"],
             )
         )
+        import json
+        import os
+
+        with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f:
+            json.dump(eval_stats, f)
     else:
 
         pfrl.experiments.train_agent_with_evaluation(
diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py
index ee470f70a..bbd266931 100644
--- a/pfrl/utils/pretrained_models.py
+++ b/pfrl/utils/pretrained_models.py
@@ -1,3 +1,162 @@
+"""This file is a fork from ChainerCV, an MIT-licensed project,
+https://github.com/chainer/chainercv/blob/master/chainercv/utils/download.py
+"""
+
+import hashlib
+import os
+import posixpath
+import shutil
+import sys
+import tempfile
+import time
+import zipfile
+
+import filelock
+from six.moves.urllib import request
+
+_models_root = os.environ.get(
+    "PFRL_MODELS_ROOT", os.path.join(os.path.expanduser("~"), ".pfrl", "models")
+)
+
+
+MODELS = {
+    "DQN": ["best", "final"],
+    "IQN": ["best", "final"],
+    "Rainbow": ["best", "final"],
+    "A3C": ["best", "final"],
+    "DDPG": ["best", "final"],
+    "TRPO": ["best", "final"],
+    "PPO": ["final"],
+    "TD3": ["best", "final"],
+    "SAC": ["best", "final"],
+}
+
+download_url = "https://chainer-assets.preferred.jp/pfrl/"
+
+
+def _get_model_directory(model_name, create_directory=True):
+    """Gets the path to the directory of given model.
+
+    The generated path is just a concatenation of the global root directory
+    and the model name. This function forked from Chainer, an MIT-licensed project,
+    https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L43
+    Args:
+        model_name (str): Name of the model.
+        create_directory (bool): If True (default), this function also creates
+            the directory at the first time. If the directory already exists,
+            then this option is ignored.
+    Returns:
+        str: Path to the dataset directory.
+    """
+    path = os.path.join(_models_root, model_name)
+    if create_directory:
+        try:
+            os.makedirs(path)
+        except OSError:
+            if not os.path.isdir(path):
+                raise
+    return path
+
+
+def _reporthook(count, block_size, total_size):
+    global start_time
+    if count == 0:
+        start_time = time.time()
+        print("  %   Total    Recv       Speed  Time left")
+        return
+    duration = time.time() - start_time
+    progress_size = count * block_size
+    try:
+        speed = progress_size / duration
+    except ZeroDivisionError:
+        speed = float("inf")
+    percent = progress_size / total_size * 100
+    eta = int((total_size - progress_size) / speed)
+    sys.stdout.write(
+        "\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}".format(
+            percent,
+            total_size / (1 << 20),
+            progress_size / (1 << 20),
+            speed / (1 << 10),
+            eta // 60 // 60,
+            (eta // 60) % 60,
+            eta % 60,
+        )
+    )
+    sys.stdout.flush()
+
+
+def cached_download(url):
+    """Downloads a file and caches it.
+
+    It downloads a file from the URL if there is no corresponding cache.
+    If there is already a cache for the given URL, it just returns the
+    path to the cache without downloading the same file.
+    This function forked from Chainer, an MIT-licensed project,
+    https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L70
+    Args:
+        url (string): URL to download from.
+    Returns:
+        string: Path to the downloaded file.
+    """
+    cache_root = os.path.join(_models_root, "_dl_cache")
+    try:
+        os.makedirs(cache_root)
+    except OSError:
+        if not os.path.exists(cache_root):
+            raise
+    lock_path = os.path.join(cache_root, "_dl_lock")
+    urlhash = hashlib.md5(url.encode("utf-8")).hexdigest()
+    cache_path = os.path.join(cache_root, urlhash)
+
+    with filelock.FileLock(lock_path):
+        if os.path.exists(cache_path):
+            return cache_path
+    temp_root = tempfile.mkdtemp(dir=cache_root)
+    try:
+        temp_path = os.path.join(temp_root, "dl")
+        print("Downloading ...")
+        print("From: {:s}".format(url))
+        print("To: {:s}".format(cache_path))
+        request.urlretrieve(url, temp_path, _reporthook)
+        with filelock.FileLock(lock_path):
+            shutil.move(temp_path, cache_path)
+    finally:
+        shutil.rmtree(temp_root)
+
+    return cache_path
+
+
+def download_and_store_model(alg, url, env, model_type):
+    """Downloads a model file and puts it under model directory.
+
+    It downloads a file from the URL and puts it under model directory.
+    If there is already a file at the destination path,
+    it just returns the path without downloading the same file.
+    Args:
+        alg (string): String representation of algorithm used in MODELS dict.
+        url (string): URL to download from.
+        env (string): Environment in which pretrained model was trained.
+        model_type (string): Either `best` or `final`.
+    Returns:
+        string: Path to the downloaded file.
+        bool: whether the model was already cached.
+    """
+    lock = os.path.join(_get_model_directory(".lock"), "models.lock")
+    with filelock.FileLock(lock):
+        root = _get_model_directory(os.path.join(alg, env))
+        url_basepath = posixpath.join(url, alg, env)
+        file = model_type + ".zip"
+        path = os.path.join(root, file)
+        is_cached = os.path.exists(path)
+        if not is_cached:
+            cache_path = cached_download(posixpath.join(url_basepath, file))
+            os.rename(cache_path, path)
+            with zipfile.ZipFile(path, "r") as zip_ref:
+                zip_ref.extractall(root)
+        return os.path.join(root, model_type), is_cached
+
+
 def download_model(alg, env, model_type="best"):
     """Downloads and returns pretrained model.
 
@@ -9,4 +168,10 @@ def download_model(alg, env, model_type="best"):
         str: Path to the downloaded file.
         bool: whether the model was already cached.
     """
-    raise NotImplementedError()
+    assert alg in MODELS, "No pretrained models for " + alg + "."
+    assert model_type in MODELS[alg], (
+        'Model type "' + model_type + '" is not supported.'
+    )
+    env = env.replace("NoFrameskip-v4", "")
+    model_path, is_cached = download_and_store_model(alg, download_url, env, model_type)
+    return model_path, is_cached
diff --git a/requirements.txt b/requirements.txt
index 43bb94eed..45b6e8b0b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 torch>=1.3.0
 gym>=0.9.7
 numpy>=1.10.4
+filelock
 pillow
diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py
new file mode 100644
index 000000000..e1ed17553
--- /dev/null
+++ b/tests/utils_tests/test_pretrained_models.py
@@ -0,0 +1,590 @@
+import os
+
+import numpy as np
+import pytest
+import torch
+from torch import nn
+
+import pfrl
+import pfrl.nn as pnn
+from pfrl import agents, explorers, replay_buffers
+from pfrl.initializers import init_chainer_default
+from pfrl.utils import download_model
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadDQN:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def _test_load_dqn(self, gpu):
+        from pfrl.q_functions import DiscreteActionValueHead
+
+        n_actions = 4
+        q_func = nn.Sequential(
+            pnn.LargeAtariCNN(),
+            init_chainer_default(nn.Linear(512, n_actions)),
+            DiscreteActionValueHead(),
+        )
+
+        # Use the same hyperparameters as the Nature paper
+
+        opt = pfrl.optimizers.RMSpropEpsInsideSqrt(
+            q_func.parameters(),
+            lr=2.5e-4,
+            alpha=0.95,
+            momentum=0.0,
+            eps=1e-2,
+            centered=True,
+        )
+
+        rbuf = replay_buffers.ReplayBuffer(100)
+
+        explorer = explorers.LinearDecayEpsilonGreedy(
+            start_epsilon=1.0,
+            end_epsilon=0.1,
+            decay_steps=10 ** 6,
+            random_action_func=lambda: np.random.randint(4),
+        )
+
+        agent = agents.DQN(
+            q_func,
+            opt,
+            rbuf,
+            gpu=gpu,
+            gamma=0.99,
+            explorer=explorer,
+            replay_start_size=50,
+            target_update_interval=10 ** 4,
+            clip_delta=True,
+            update_interval=4,
+            batch_accumulator="sum",
+            phi=lambda x: x,
+        )
+
+        downloaded_model, exists = download_model(
+            "DQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_dqn(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_dqn(gpu=0)
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadIQN:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def _test_load_iqn(self, gpu):
+        n_actions = 4
+        q_func = pfrl.agents.iqn.ImplicitQuantileQFunction(
+            psi=nn.Sequential(
+                nn.Conv2d(4, 32, 8, stride=4),
+                nn.ReLU(),
+                nn.Conv2d(32, 64, 4, stride=2),
+                nn.ReLU(),
+                nn.Conv2d(64, 64, 3, stride=1),
+                nn.ReLU(),
+                nn.Flatten(),
+            ),
+            phi=nn.Sequential(pfrl.agents.iqn.CosineBasisLinear(64, 3136), nn.ReLU(),),
+            f=nn.Sequential(
+                nn.Linear(3136, 512), nn.ReLU(), nn.Linear(512, n_actions),
+            ),
+        )
+
+        # Use the same hyper parameters as https://arxiv.org/abs/1710.10044
+        opt = torch.optim.Adam(q_func.parameters(), lr=5e-5, eps=1e-2 / 32)
+
+        rbuf = replay_buffers.ReplayBuffer(100)
+
+        explorer = explorers.LinearDecayEpsilonGreedy(
+            start_epsilon=1.0,
+            end_epsilon=0.1,
+            decay_steps=10 ** 6,
+            random_action_func=lambda: np.random.randint(4),
+        )
+
+        agent = agents.IQN(
+            q_func,
+            opt,
+            rbuf,
+            gpu=gpu,
+            gamma=0.99,
+            explorer=explorer,
+            replay_start_size=50,
+            target_update_interval=10 ** 4,
+            update_interval=4,
+            batch_accumulator="mean",
+            phi=lambda x: x,
+            quantile_thresholds_N=64,
+            quantile_thresholds_N_prime=64,
+            quantile_thresholds_K=32,
+        )
+
+        downloaded_model, exists = download_model(
+            "IQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_iqn(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_iqn(gpu=0)
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadRainbow:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def _test_load_rainbow(self, gpu):
+        from pfrl.q_functions import DistributionalDuelingDQN
+
+        q_func = DistributionalDuelingDQN(4, 51, -10, 10)
+        pnn.to_factorized_noisy(q_func, sigma_scale=0.5)
+        explorer = explorers.Greedy()
+        opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10 ** -4)
+        rbuf = replay_buffers.ReplayBuffer(100)
+        agent = agents.CategoricalDoubleDQN(
+            q_func,
+            opt,
+            rbuf,
+            gpu=gpu,
+            gamma=0.99,
+            explorer=explorer,
+            minibatch_size=32,
+            replay_start_size=50,
+            target_update_interval=32000,
+            update_interval=4,
+            batch_accumulator="mean",
+            phi=lambda x: x,
+        )
+
+        downloaded_model, exists = download_model(
+            "Rainbow", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_rainbow(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_rainbow(gpu=0)
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadA3C:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def test_load_a3c(self):
+        from pfrl.policies import SoftmaxCategoricalHead
+
+        obs_size = 4
+        n_actions = 4
+        a3c_model = nn.Sequential(
+            nn.Conv2d(obs_size, 16, 8, stride=4),
+            nn.ReLU(),
+            nn.Conv2d(16, 32, 4, stride=2),
+            nn.ReLU(),
+            nn.Flatten(),
+            nn.Linear(2592, 256),
+            nn.ReLU(),
+            pfrl.nn.Branched(
+                nn.Sequential(nn.Linear(256, n_actions), SoftmaxCategoricalHead(),),
+                nn.Linear(256, 1),
+            ),
+        )
+        from pfrl.optimizers import SharedRMSpropEpsInsideSqrt
+
+        opt = SharedRMSpropEpsInsideSqrt(
+            a3c_model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99
+        )
+        agent = agents.A3C(
+            a3c_model, opt, t_max=5, gamma=0.99, beta=1e-2, phi=lambda x: x
+        )
+        downloaded_model, exists = download_model(
+            "A3C", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadDDPG:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def _test_load_ddpg(self, gpu):
+
+        obs_size = 11
+        action_size = 3
+        from pfrl.nn import ConcatObsAndAction
+
+        q_func = nn.Sequential(
+            ConcatObsAndAction(),
+            nn.Linear(obs_size + action_size, 400),
+            nn.ReLU(),
+            nn.Linear(400, 300),
+            nn.ReLU(),
+            nn.Linear(300, 1),
+        )
+        from pfrl.nn import BoundByTanh
+        from pfrl.policies import DeterministicHead
+
+        policy = nn.Sequential(
+            nn.Linear(obs_size, 400),
+            nn.ReLU(),
+            nn.Linear(400, 300),
+            nn.ReLU(),
+            nn.Linear(300, action_size),
+            BoundByTanh(low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0]),
+            DeterministicHead(),
+        )
+
+        opt_a = torch.optim.Adam(policy.parameters())
+        opt_c = torch.optim.Adam(q_func.parameters())
+
+        explorer = explorers.AdditiveGaussian(
+            scale=0.1, low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0]
+        )
+
+        agent = agents.DDPG(
+            policy,
+            q_func,
+            opt_a,
+            opt_c,
+            replay_buffers.ReplayBuffer(100),
+            gamma=0.99,
+            explorer=explorer,
+            replay_start_size=1000,
+            target_update_method="soft",
+            target_update_interval=1,
+            update_interval=1,
+            soft_update_tau=5e-3,
+            n_times_update=1,
+            gpu=gpu,
+            minibatch_size=100,
+            burnin_action_func=None,
+        )
+
+        downloaded_model, exists = download_model(
+            "DDPG", "Hopper-v2", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_ddpg(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_ddpg(gpu=0)
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadTRPO:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def _test_load_trpo(self, gpu):
+        obs_size = 11
+        action_size = 3
+
+        policy = torch.nn.Sequential(
+            nn.Linear(obs_size, 64),
+            nn.Tanh(),
+            nn.Linear(64, 64),
+            nn.Tanh(),
+            nn.Linear(64, action_size),
+            pfrl.policies.GaussianHeadWithStateIndependentCovariance(
+                action_size=action_size,
+                var_type="diagonal",
+                var_func=lambda x: torch.exp(2 * x),  # Parameterize log std
+                var_param_init=0,  # log std = 0 => std = 1
+            ),
+        )
+
+        vf = torch.nn.Sequential(
+            nn.Linear(obs_size, 64),
+            nn.Tanh(),
+            nn.Linear(64, 64),
+            nn.Tanh(),
+            nn.Linear(64, 1),
+        )
+        vf_opt = torch.optim.Adam(vf.parameters())
+
+        agent = agents.TRPO(
+            policy=policy,
+            vf=vf,
+            vf_optimizer=vf_opt,
+            gpu=gpu,
+            update_interval=5000,
+            max_kl=0.01,
+            conjugate_gradient_max_iter=20,
+            conjugate_gradient_damping=1e-1,
+            gamma=0.995,
+            lambd=0.97,
+            vf_epochs=5,
+            entropy_coef=0,
+        )
+
+        downloaded_model, exists = download_model(
+            "TRPO", "Hopper-v2", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_trpo(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_trpo(gpu=0)
+
+
+class TestLoadPPO:
+    def _test_load_ppo(self, gpu):
+        obs_size = 11
+        action_size = 3
+        from pfrl.policies import GaussianHeadWithStateIndependentCovariance
+
+        policy = torch.nn.Sequential(
+            nn.Linear(obs_size, 64),
+            nn.Tanh(),
+            nn.Linear(64, 64),
+            nn.Tanh(),
+            nn.Linear(64, action_size),
+            GaussianHeadWithStateIndependentCovariance(
+                action_size=action_size,
+                var_type="diagonal",
+                var_func=lambda x: torch.exp(2 * x),  # Parameterize log std
+                var_param_init=0,  # log std = 0 => std = 1
+            ),
+        )
+
+        vf = torch.nn.Sequential(
+            nn.Linear(obs_size, 64),
+            nn.Tanh(),
+            nn.Linear(64, 64),
+            nn.Tanh(),
+            nn.Linear(64, 1),
+        )
+
+        model = pnn.Branched(policy, vf)
+        opt = torch.optim.Adam(model.parameters(), lr=3e-4, eps=1e-5)
+
+        agent = agents.PPO(
+            model,
+            opt,
+            obs_normalizer=None,
+            gpu=gpu,
+            update_interval=2048,
+            minibatch_size=64,
+            epochs=10,
+            clip_eps_vf=None,
+            entropy_coef=0,
+            standardize_advantages=True,
+            gamma=0.995,
+            lambd=0.97,
+        )
+
+        downloaded_model, exists = download_model(
+            "PPO", "Hopper-v2", model_type="final"
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_ppo(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_ppo(gpu=0)
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadTD3:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def _test_load_td3(self, gpu):
+
+        obs_size = 11
+        action_size = 3
+
+        def make_q_func_with_optimizer():
+            q_func = nn.Sequential(
+                pnn.ConcatObsAndAction(),
+                nn.Linear(obs_size + action_size, 400),
+                nn.ReLU(),
+                nn.Linear(400, 300),
+                nn.ReLU(),
+                nn.Linear(300, 1),
+            )
+            q_func_optimizer = torch.optim.Adam(q_func.parameters())
+            return q_func, q_func_optimizer
+
+        q_func1, q_func1_optimizer = make_q_func_with_optimizer()
+        q_func2, q_func2_optimizer = make_q_func_with_optimizer()
+
+        policy = nn.Sequential(
+            nn.Linear(obs_size, 400),
+            nn.ReLU(),
+            nn.Linear(400, 300),
+            nn.ReLU(),
+            nn.Linear(300, action_size),
+            nn.Tanh(),
+            pfrl.policies.DeterministicHead(),
+        )
+        policy_optimizer = torch.optim.Adam(policy.parameters())
+
+        rbuf = replay_buffers.ReplayBuffer(100)
+        explorer = explorers.AdditiveGaussian(
+            scale=0.1, low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0]
+        )
+
+        agent = agents.TD3(
+            policy,
+            q_func1,
+            q_func2,
+            policy_optimizer,
+            q_func1_optimizer,
+            q_func2_optimizer,
+            rbuf,
+            gamma=0.99,
+            soft_update_tau=5e-3,
+            explorer=explorer,
+            replay_start_size=1000,
+            gpu=gpu,
+            minibatch_size=100,
+            burnin_action_func=None,
+        )
+
+        downloaded_model, exists = download_model(
+            "TD3", "Hopper-v2", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_td3(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_td3(gpu=0)
+
+
+@pytest.mark.parametrize("pretrained_type", ["final", "best"])
+class TestLoadSAC:
+    @pytest.fixture(autouse=True)
+    def setup(self, pretrained_type):
+        self.pretrained_type = pretrained_type
+
+    def _test_load_sac(self, gpu):
+        obs_size = 11
+        action_size = 3
+
+        def squashed_diagonal_gaussian_head(x):
+            assert x.shape[-1] == action_size * 2
+            mean, log_scale = torch.chunk(x, 2, dim=1)
+            log_scale = torch.clamp(log_scale, -20.0, 2.0)
+            var = torch.exp(log_scale * 2)
+            from torch import distributions
+
+            base_distribution = distributions.Independent(
+                distributions.Normal(loc=mean, scale=torch.sqrt(var)), 1
+            )
+            # cache_size=1 is required for numerical stability
+            return distributions.transformed_distribution.TransformedDistribution(
+                base_distribution,
+                [distributions.transforms.TanhTransform(cache_size=1)],
+            )
+
+        from pfrl.nn.lmbda import Lambda
+
+        policy = nn.Sequential(
+            nn.Linear(obs_size, 256),
+            nn.ReLU(),
+            nn.Linear(256, 256),
+            nn.ReLU(),
+            nn.Linear(256, action_size * 2),
+            Lambda(squashed_diagonal_gaussian_head),
+        )
+        policy_optimizer = torch.optim.Adam(policy.parameters(), lr=3e-4)
+
+        def make_q_func_with_optimizer():
+            q_func = nn.Sequential(
+                pfrl.nn.ConcatObsAndAction(),
+                nn.Linear(obs_size + action_size, 256),
+                nn.ReLU(),
+                nn.Linear(256, 256),
+                nn.ReLU(),
+                nn.Linear(256, 1),
+            )
+            torch.nn.init.xavier_uniform_(q_func[1].weight)
+            torch.nn.init.xavier_uniform_(q_func[3].weight)
+            torch.nn.init.xavier_uniform_(q_func[5].weight)
+            q_func_optimizer = torch.optim.Adam(q_func.parameters(), lr=3e-4)
+            return q_func, q_func_optimizer
+
+        q_func1, q_func1_optimizer = make_q_func_with_optimizer()
+        q_func2, q_func2_optimizer = make_q_func_with_optimizer()
+
+        agent = agents.SoftActorCritic(
+            policy,
+            q_func1,
+            q_func2,
+            policy_optimizer,
+            q_func1_optimizer,
+            q_func2_optimizer,
+            replay_buffers.ReplayBuffer(100),
+            gamma=0.99,
+            replay_start_size=1000,
+            gpu=gpu,
+            minibatch_size=256,
+            burnin_action_func=None,
+            entropy_target=-3,
+            temperature_optimizer_lr=3e-4,
+        )
+
+        downloaded_model, exists = download_model(
+            "SAC", "Hopper-v2", model_type=self.pretrained_type
+        )
+        agent.load(downloaded_model)
+        if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"):
+            assert exists
+
+    def test_cpu(self):
+        self._test_load_sac(gpu=None)
+
+    @pytest.mark.gpu
+    def test_gpu(self):
+        self._test_load_sac(gpu=0)