diff --git a/player_game/env/__pycache__/grid.cpython-38.pyc b/player_game/env/__pycache__/grid.cpython-38.pyc new file mode 100644 index 0000000..c53078b Binary files /dev/null and b/player_game/env/__pycache__/grid.cpython-38.pyc differ diff --git a/player_game/env/__pycache__/player.cpython-38.pyc b/player_game/env/__pycache__/player.cpython-38.pyc new file mode 100644 index 0000000..f2c19e2 Binary files /dev/null and b/player_game/env/__pycache__/player.cpython-38.pyc differ diff --git a/player_game/env/__pycache__/plot.cpython-38.pyc b/player_game/env/__pycache__/plot.cpython-38.pyc new file mode 100644 index 0000000..445dfdd Binary files /dev/null and b/player_game/env/__pycache__/plot.cpython-38.pyc differ diff --git a/player_game/env/__pycache__/utils.cpython-38.pyc b/player_game/env/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..1e2e656 Binary files /dev/null and b/player_game/env/__pycache__/utils.cpython-38.pyc differ diff --git a/player_game/env/grid.py b/player_game/env/grid.py index e6096db..b581d9c 100644 --- a/player_game/env/grid.py +++ b/player_game/env/grid.py @@ -42,14 +42,14 @@ def __init__(self, player, n_rows=10, n_cols=10, screen_width=1000, screen_heigh self.grid = array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 3, 3, 3, 0, 3, 3, 3, 0, 0], - [0, 0, 3, 0, 0, 3, 0, 3, 0, 0], - [0, 0, 3, 0, 0, 3, 3, 3, 0, 0], - [0, 0, 3, 0, 0, 3, 0, 3, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 3, 0, 0, 0, 3, 0, 0, 0], - [0, 0, 3, 0, 0, 0, 3, 0, 0, 0], - [0, 0, 3, 0, 0, 0, 3, 3, 0, 0]]).T + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).T # Posicao do objetivo self.goal_x = randint(0, n_cols-1) diff --git a/player_game/env/rat_game_env.py b/player_game/env/rat_game_env.py index d2a88d6..603a939 100644 --- a/player_game/env/rat_game_env.py +++ b/player_game/env/rat_game_env.py @@ -113,7 +113,7 @@ def _get_state(self): current_x = self.agent.x current_y = self.agent.y - while self.maze.grid.is_valid_position(self.maze.grid[current_x, current_y]): + for i in range(2): if self.agent.direction == "Up": current_x -= 1 state.append(self.maze.grid[current_x, current_y]) @@ -130,7 +130,7 @@ def _get_state(self): current_y -= 1 state.append(self.maze.grid[current_x, current_y]) - return (self.agent.x, self.agent.y) + return (self.agent.x, self.agent.y, state) def _take_action(self, action): self.agent.got_cheese = False @@ -157,7 +157,7 @@ def __init__(self): self.maze_size = tuple([10,10]) self.state_bounds = list(zip([0,0], [10,10])) self.number_actions = 4 - self.Q = np.zeros(self.maze_size + (self.number_actions, ), dtype=float) + self.Q = np.zeros(self.maze_size + (4, 4) + (self.number_actions, ), dtype=float) self.epsilon = 1 self.learning_rate = 1 self.decay = DECAY @@ -167,16 +167,7 @@ def __init__(self): def discretize_state(self, state) -> tuple: - discretazed_state = [] - for i in range(len(state)): - if state[i] <= self.state_bounds[i][0]: - new_state = 0 - elif state[i] >= self.state_bounds[i][1]: - new_state = (10,10)[i] - 1 - else: - new_state = int(round(state[i])) - discretazed_state.append(new_state) - return tuple(discretazed_state) + return tuple(state) def decide_action(self, state) -> int: @@ -188,6 +179,10 @@ def decide_action(self, state) -> int: return action def update_q(self, current_state, action, reward, next_state): + print("current:",current_state) + print("next:",next_state) + print("action:", action) + print("sum",tuple(current_state) + (action,)) self.Q[tuple(current_state) + (action,)] = self.Q[tuple(current_state) + (action,)] + self.learning_rate * (reward + self.discount * np.max(self.Q[tuple(next_state)]) - self.Q[tuple(current_state) + (action,)]) @@ -206,7 +201,7 @@ def update_epsilon(self, episode) -> float: def train(self): for episode in range(EPISODES): current_state = self.env._reset() - #current_state = self.discretize_state(current_state) + current_state = self.discretize_state(current_state) done = False