-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpole.py
98 lines (83 loc) · 3.57 KB
/
pole.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gym
import random
import os
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
class Agent():
def __init__(self, state_size, action_size):
self.weight_backup = "cartpole_weight.h5"
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.learning_rate = 0.001
self.gamma = 0.95
self.exploration_rate = 1.0
self.exploration_min = 0.01
self.exploration_decay = 0.995
self.brain = self._build_model()
def _build_model(self):
# Neural Net for Deep-Q learning Model
model = Sequential()
model.add(Dense(24, input_dim=self.state_size, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
if os.path.isfile(self.weight_backup):
model.load_weights(self.weight_backup)
self.exploration_rate = self.exploration_min
return model
def save_model(self):
self.brain.save(self.weight_backup)
def act(self, state):
if np.random.rand() <= self.exploration_rate:
return random.randrange(self.action_size)
act_values = self.brain.predict(state)
return np.argmax(act_values[0])
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def replay(self, sample_batch_size):
if len(self.memory) < sample_batch_size:
return
sample_batch = random.sample(self.memory, sample_batch_size)
for state, action, reward, next_state, done in sample_batch:
target = reward
if not done:
target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
target_f = self.brain.predict(state)
target_f[0][action] = target
self.brain.fit(state, target_f, epochs=1, verbose=0)
if self.exploration_rate > self.exploration_min:
self.exploration_rate *= self.exploration_decay
class CartPole:
def __init__(self):
self.sample_batch_size = 32
self.episodes = 10000
self.env = gym.make('CartPole-v1')
self.state_size = self.env.observation_space.shape[0]
self.action_size = self.env.action_space.n
self.agent = Agent(self.state_size, self.action_size)
def run(self):
try:
for index_episode in range(self.episodes):
state = self.env.reset()
state = np.reshape(state, [1, self.state_size])
done = False
index = 0
while not done:
self.env.render()
action = self.agent.act(state)
next_state, reward, done, _ = self.env.step(action)
next_state = np.reshape(next_state, [1, self.state_size])
self.agent.remember(state, action, reward, next_state, done)
state = next_state
index += 1
print("Episode {}# Score: {}".format(index_episode, index + 1))
self.agent.replay(self.sample_batch_size)
finally:
self.agent.save_model()
if __name__ == "__main__":
cartpole = CartPole()
cartpole.run()