Skip to content

Commit

Permalink
First training ppo
Browse files Browse the repository at this point in the history
  • Loading branch information
CristianAbrante committed Dec 4, 2020
1 parent 3d3107a commit dad7377
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 9 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# Created by https://www.toptal.com/developers/gitignore/api/pycharm+all,linux,windows,python
# Edit at https://www.toptal.com/developers/gitignore?templates=pycharm+all,linux,windows,python

### Project ###
models/
plots/

### Linux ###
*~

Expand Down
38 changes: 29 additions & 9 deletions agents/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch

env = gym.make('WimblepongVisualSimpleAI-v0')
train_episodes = 50
train_episodes = 500

# TODO: Change when using a convolutional layer
observation_space_dim = env.observation_space.shape[0] * env.observation_space.shape[1]
Expand All @@ -24,13 +24,15 @@
# Arrays to keep track of rewards
reward_history, timestep_history = [], []
average_reward_history = []
win_rate = []
average_win_rate = []
win_rate_history = []
average_win_rate_history = []
number_of_wins = 0

# The training loop is run per episode
for episode in range(train_episodes):
reward_sum, timesteps = 0, 0
done = False
has_won = False

# the environment is reset each episode.
observation, previous_observation = env.reset(), None
Expand All @@ -51,15 +53,26 @@
reward_sum += reward
timesteps += 1

print(f"Episode {episode} finished | total reward -> {reward_sum}")

# TODO: implement win counter.
if done and reward == 10:
number_of_wins += 1

# Keeping records for future plots
reward_history.append(reward_sum)
timestep_history.append(timesteps)
avg = np.mean(reward_history[-100:] if episode > 100 else reward_history)
average_reward_history.append(avg)

reward_history.append(reward_sum)
avg_reward = np.mean(reward_history[-100:] if episode > 100 else reward_history)
average_reward_history.append(avg_reward)

win_rate = number_of_wins / episode if episode != 0 else 0.0
win_rate_history.append(win_rate)
avg_win_rate = np.mean(win_rate_history[-100:] if episode > 100 else win_rate_history)
average_win_rate_history.append(avg_win_rate)

# Printing section.

if episode % 5 == 0:
print(f"Episode {episode} finished | total reward -> {np.mean(reward_history)} | win rate -> {win_rate}")
torch.save(agent.policy.state_dict(), f"models/model_{episode}.mdl")

agent.update_policy(episode)

Expand All @@ -70,6 +83,13 @@
plt.savefig("plots/reward-history.png")
plt.show()

plt.plot(win_rate_history)
plt.plot(average_win_rate_history)
plt.legend(["Win rate", "100-episode win rate average"])
plt.title("Win rate history")
plt.savefig("plots/win-rate-history.png")
plt.show()

torch.save(agent.policy.state_dict(), "model_%s_%d.mdl")

if __name__ == "__main__":
Expand Down

0 comments on commit dad7377

Please sign in to comment.