diff --git a/.gitignore b/.gitignore index d4665fd..a7cdca7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,10 @@ # Created by https://www.toptal.com/developers/gitignore/api/pycharm+all,linux,windows,python # Edit at https://www.toptal.com/developers/gitignore?templates=pycharm+all,linux,windows,python +### Project ### +models/ +plots/ + ### Linux ### *~ diff --git a/agents/main.py b/agents/main.py index 0f8d0ea..f9cdf37 100644 --- a/agents/main.py +++ b/agents/main.py @@ -7,7 +7,7 @@ import torch env = gym.make('WimblepongVisualSimpleAI-v0') -train_episodes = 50 +train_episodes = 500 # TODO: Change when using a convolutional layer observation_space_dim = env.observation_space.shape[0] * env.observation_space.shape[1] @@ -24,13 +24,15 @@ # Arrays to keep track of rewards reward_history, timestep_history = [], [] average_reward_history = [] -win_rate = [] -average_win_rate = [] +win_rate_history = [] +average_win_rate_history = [] +number_of_wins = 0 # The training loop is run per episode for episode in range(train_episodes): reward_sum, timesteps = 0, 0 done = False + has_won = False # the environment is reset each episode. observation, previous_observation = env.reset(), None @@ -51,15 +53,26 @@ reward_sum += reward timesteps += 1 - print(f"Episode {episode} finished | total reward -> {reward_sum}") - - # TODO: implement win counter. + if done and reward == 10: + number_of_wins += 1 # Keeping records for future plots - reward_history.append(reward_sum) timestep_history.append(timesteps) - avg = np.mean(reward_history[-100:] if episode > 100 else reward_history) - average_reward_history.append(avg) + + reward_history.append(reward_sum) + avg_reward = np.mean(reward_history[-100:] if episode > 100 else reward_history) + average_reward_history.append(avg_reward) + + win_rate = number_of_wins / episode if episode != 0 else 0.0 + win_rate_history.append(win_rate) + avg_win_rate = np.mean(win_rate_history[-100:] if episode > 100 else win_rate_history) + average_win_rate_history.append(avg_win_rate) + + # Printing section. + + if episode % 5 == 0: + print(f"Episode {episode} finished | total reward -> {np.mean(reward_history)} | win rate -> {win_rate}") + torch.save(agent.policy.state_dict(), f"models/model_{episode}.mdl") agent.update_policy(episode) @@ -70,6 +83,13 @@ plt.savefig("plots/reward-history.png") plt.show() +plt.plot(win_rate_history) +plt.plot(average_win_rate_history) +plt.legend(["Win rate", "100-episode win rate average"]) +plt.title("Win rate history") +plt.savefig("plots/win-rate-history.png") +plt.show() + torch.save(agent.policy.state_dict(), "model_%s_%d.mdl") if __name__ == "__main__":