-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
107 lines (84 loc) · 3.16 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import sys
import gym
import numpy as np
import argparse
from NAFAgent import NAFAgent
from QLAgent import QLAgent
from RandomAgent import RandomAgent
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--environment', type=str, default='CartPole-v0',
help='OpenAI Gym environment to run.')
parser.add_argument('-p', '--episodes', type=int, default=1000,
help='Number of episodes to simulate.')
parser.add_argument('-g', '--goal', type=int, default=195,
help='Goal score for the environment.')
parser.add_argument('-t', '--time', type=int, default=200,
help='Time steps for each episode.')
parser.add_argument('-a', '--agent', type=str, default='QL',
help='Learning agent type (QL or NAF).')
parser.add_argument('--report', action='store_true', help='Report results.')
parser.add_argument('-d', '--debug', action='store_true',
help='Print max values at each time-step.')
args = parser.parse_args()
print(args)
environment = args.environment
episodes = args.episodes
goal = args.goal
time = args.time
env = gym.make(environment)
if (args.agent == 'QL'):
agent = QLAgent(env)
elif (args.agent == 'NAF'):
agent = NAFAgent(env)
elif (args.agent == 'Random'):
agent = RandomAgent(env)
scores = []
if args.report:
filename = 'tmp/gym-report'
env.monitor.start(filename, force=True)
for i_episode in range(episodes):
# Get initial observation.
agent.reset()
observation = env.reset()
score = 0
alt_score = 0
# Run n = time steps
for t in range(time):
# Save the previous state.
prev_state = observation
#env.render()
#if i_episode % 500 == 0:
# env.render()
report = args.debug
next_action = agent.get_action(observation, report)
observation, reward, done, info = env.step(next_action)
score += reward
reward += observation[0]
if report:
print(reward)
alt_score += reward
agent.update(prev_state, next_action, reward, observation, done)
if done or t == time - 1:
print(i_episode + 1, score, alt_score, t, done)
scores.append(score)
running_avg = np.average(scores[-100:])
#if running_avg > goal:
# print '100-run average {0} on run {1}!'.format(\
# running_avg, i_episode)
if (i_episode + 1) % 50 == 0:
print('{0} average score at {1}'.format(running_avg, \
i_episode + 1))
break
if args.report:
env.monitor.close()
key_file = open('api.key', 'r')
gym_key = key_file.readline()
if args.agent == 'NAF':
algo_id = 'alg_xjVArtUxQXqfSq5q89dRjQ'
else:
algo_id = 'alg_sbIxfyjIRUSBrBA1IOFg'
gym.upload(filename, api_key=gym_key, algorithm_id=algo_id)
return
if __name__ == "__main__":
main()