-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
95 lines (77 loc) · 2.44 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from datetime import datetime
import random
from matplotlib import pyplot as plt
from classes import *
from dataFunctions import *
EPISODES = 1000
GAMMA = 0.99
TRAINING = True
SAVE = True
TRAIN_START = "2020-01-01"
TRAIN_END = "2021-01-01"
TEST_START = "2021-01-01"
TEST_END = "20202-01-01"
def train(agent: Agent):
start = datetime.now()
total_avg_rewards = []
for episode in range(EPISODES):
episode_reward = []
done = False
env = Env(random.choice(["TSLA", "GOOG", "AAPL", "TWTR", "AMZN", "MSFT"]))
state = env.get_init_state()
total_reward = 0
actor_loss_history = []
critic_loss_history = []
while not done:
action = agent.act(state.state())
next_state, reward, done = env.do_something(action)
actor_loss, critic_loss = agent.learn(state.state(), action, reward, next_state.state(), done)
actor_loss_history.append(actor_loss)
critic_loss_history.append(critic_loss)
state = next_state
total_reward += reward
if done:
print(f"Reward after episode {episode} is {total_reward}")
episode_reward.append(total_reward)
total_avg_rewards.append(np.mean(episode_reward))
if SAVE:
agent.actor.save("actor.tf")
agent.critic.save("critic.tf")
end = datetime.now()
print(f"""
Training Summary:
Time Elapsed: {end-start}
Episodes: {EPISODES}
ACTOR_HIDDEN: {ACTOR_HIDDEN}
CRITIC_HIDDEN: {CRITIC_HIDDEN}
LEARNING_RATE: {LEARNING_RATE}
LOOKBACK: {LOOKBACK}
GAMMA: {GAMMA}
""")
ep = [i for i in range(EPISODES)]
plt.plot(ep, total_avg_rewards, 'b')
plt.title("avg reward Vs episodes")
plt.xlabel("episodes")
plt.ylabel("average reward per 100 episodes")
plt.grid(True)
plt.show()
def test(agent: Agent):
env = Env("AAPL")
state = env.get_init_state()
correct_guesses = 0
total_guesses = 0
done = False
while not done:
action = agent.act(state.state())
expected_action, state, done = env.expected_action()
if not done:
if expected_action == action:
correct_guesses += 1
total_guesses += 1
print(f"The agent correctly predicted the INCREASE / DECREASE {correct_guesses/total_guesses} percent of the time")
if __name__ == "__main__":
agent = Agent(GAMMA)
if TRAINING:
train(agent)
else:
test(agent)