Skip to content

Commit 9cb6b28

Browse files
committed
First draft CNN code
1 parent 61f39ab commit 9cb6b28

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+11003
-0
lines changed

Custom_model/3.ttm

427 KB
Binary file not shown.

Custom_model/Pioneer_p3dx.ttm

494 KB
Binary file not shown.
427 KB
Binary file not shown.
414 KB
Binary file not shown.
427 KB
Binary file not shown.
427 KB
Binary file not shown.
427 KB
Binary file not shown.

DQNAgent.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import random
2+
import numpy as np
3+
from collections import deque
4+
5+
6+
class DQNAgent:
7+
def __init__(self, state_size, action_size,hiddenLayers,act):
8+
9+
self.load_model = True
10+
11+
# get size of state and action
12+
self.state_size = state_size
13+
self.action_size = action_size
14+
15+
# These are hyper parameters for the DQN
16+
self.hiddenLayers = hiddenLayers
17+
self.activationType = act
18+
self.discount_factor = 0.99
19+
self.learning_rate = 0.001
20+
self.epsilon = 1.0
21+
self.epsilon_decay = 0.9992
22+
self.epsilon_min = 0.01
23+
self.batch_size = 32
24+
self.train_start = 1000
25+
# create replay memory using deque
26+
self.memory = deque(maxlen=2000)
27+
28+
# create main model and target model
29+
self.model = self.build_model()
30+
self.target_model = self.build_model()
31+
32+
# initialize target model
33+
self.update_target_model()
34+
35+
if self.load_model:
36+
self.model.load_weights("./save_model/ep"+str(file_count)+".h5")
37+
38+
# approximate Q function using Neural Network
39+
# state is input and Q Value of each action is output of network
40+
l
41+
42+
def build_model(self, hiddenLayers, activationType):
43+
model = Sequential()
44+
if len(hiddenLayers) == 0:
45+
model.add(Dense(self.action_size, input_dim=self.state_size) ) # model.add(Dense(self.output_size, input_shape=(self.state_size,)) ) #
46+
model.add(Activation("linear"))
47+
else :
48+
model.add(Dense(hiddenLayers[0], input_dim = self.state_size) )
49+
50+
for index in range(1, len(hiddenLayers)):
51+
52+
layerSize = hiddenLayers[index]
53+
model.add(Dense(layerSize))
54+
model.add(Activation(self.activationType))
55+
56+
model.add(Dense(self.action_size))
57+
model.add(Activation("linear"))
58+
59+
# optimizer = optimizers.RMSprop(lr=self.learningRate, rho=0.9, epsilon=1e-06)
60+
optimizer = optimizers.SGD(lr=self.learning_rate, clipnorm=1.)
61+
# optimizer = optimizers.Adam(lr=self.learning_rate)
62+
63+
model.summary()
64+
65+
model.compile(loss="mse", optimizer=optimizer)
66+
67+
68+
# after some time interval update the target model to be same with model
69+
def update_target_model(self):
70+
self.target_model.set_weights(self.model.get_weights())
71+
72+
# get action from model using epsilon-greedy policy
73+
def get_action(self, state):
74+
if np.random.rand() <= self.epsilon:
75+
return random.randrange(self.action_size)
76+
else:
77+
q_value = self.model.predict(state)
78+
return np.argmax(q_value[0])
79+
80+
# save sample <s,a,r,s'> to the replay memory
81+
def append_sample(self, state, action, reward, next_state, done):
82+
self.memory.append((state, action, reward, next_state, done))
83+
if self.epsilon > self.epsilon_min:
84+
self.epsilon *= self.epsilon_decay
85+
86+
# pick samples randomly from replay memory (with batch_size)
87+
def train_model(self):
88+
if len(self.memory) < self.train_start:
89+
return
90+
batch_size = min(self.batch_size, len(self.memory))
91+
mini_batch = random.sample(self.memory, batch_size)
92+
93+
update_input = np.zeros((batch_size, self.state_size))
94+
update_target = np.zeros((batch_size, self.state_size))
95+
action, reward, done = [], [], []
96+
97+
for i in range(self.batch_size):
98+
update_input[i] = mini_batch[i][0]
99+
action.append(mini_batch[i][1])
100+
reward.append(mini_batch[i][2])
101+
update_target[i] = mini_batch[i][3]
102+
done.append(mini_batch[i][4])
103+
104+
target = self.model.predict(update_input)
105+
target_val = self.target_model.predict(update_target)
106+
107+
for i in range(self.batch_size):
108+
# Q Learning: get maximum Q value at s' from target model
109+
if done[i]:
110+
target[i][action[i]] = reward[i]
111+
else:
112+
target[i][action[i]] = reward[i] + self.discount_factor * (
113+
np.amax(target_val[i]))
114+
115+
# and do the model fit!
116+
self.model.fit(update_input, target, batch_size=self.batch_size,
117+
epochs=1, verbose=0)
118+

MyScenes/CollisionDetection.ttt

742 KB
Binary file not shown.

MyScenes/ModelScene.ttt

665 KB
Binary file not shown.

0 commit comments

Comments
 (0)