-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
train_ppo_agent.py
26 lines (19 loc) · 797 Bytes
/
train_ppo_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import os
from environment import RacerEnvironment
from utils import SaveOnBestTrainingRewardCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
log_dir = "models/trained_ppo_agent/"
os.makedirs(log_dir, exist_ok=True)
env = RacerEnvironment(render=True)
env = Monitor(env, log_dir)
# check_env(env, warn=True)
if os.path.exists('models/trained_ppo_agent/racer.zip'):
print("Loading existing model")
model = PPO.load('models/trained_ppo_agent/racer.zip', env=env)
else:
print("Training new model")
model = PPO("MlpPolicy", env, verbose=1)
callback = SaveOnBestTrainingRewardCallback(check_freq=2048, log_dir=log_dir)
model.learn(total_timesteps=600000, callback=callback)