-
Notifications
You must be signed in to change notification settings - Fork 0
/
utilities.py
128 lines (114 loc) · 5.24 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import torch
import torch.nn.functional as F
import torch.distributed as dist
from torch.autograd import Variable
import numpy as np
def transpose_list(mylist):
return list(map(list, zip(*mylist)))
def transpose_to_tensor(input_list):
make_tensor = lambda x: torch.tensor(x, dtype=torch.float)
return list(map(make_tensor, zip(*input_list)))
def transpose_to_tensorAsitis(input_list):
make_tensor = lambda x: torch.tensor(x, dtype=torch.float)
return list(map(make_tensor, input_list))
# https://github.com/ikostrikov/pytorch-ddpg-naf/blob/master/ddpg.py#L11
def soft_update(target, source, tau):
"""
Perform DDPG soft update (move target params toward source based on weight
factor tau)
Inputs:
target (torch.nn.Module): Net to copy parameters to
source (torch.nn.Module): Net whose parameters to copy
tau (float, 0 < x < 1): Weight factor for update
"""
for target_param, param in zip(target.parameters(), source.parameters()):
target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
# https://github.com/ikostrikov/pytorch-ddpg-naf/blob/master/ddpg.py#L15
def hard_update(target, source):
"""
Copy network parameters from source to target
Inputs:
target (torch.nn.Module): Net to copy parameters to
source (torch.nn.Module): Net whose parameters to copy
"""
for target_param, param in zip(target.parameters(), source.parameters()):
target_param.data.copy_(param.data)
# https://github.com/seba-1511/dist_tuto.pth/blob/gh-pages/train_dist.py
def average_gradients(model):
""" Gradient averaging. """
size = float(dist.get_world_size())
for param in model.parameters():
dist.all_reduce(param.grad.data, op=dist.reduce_op.SUM, group=0)
param.grad.data /= size
# https://github.com/seba-1511/dist_tuto.pth/blob/gh-pages/train_dist.py
def init_processes(rank, size, fn, backend='gloo'):
""" Initialize the distributed environment. """
os.environ['MASTER_ADDR'] = '127.0.0.1'
os.environ['MASTER_PORT'] = '29500'
dist.init_process_group(backend, rank=rank, world_size=size)
fn(rank, size)
def onehot_from_logits(logits, eps=0.0):
"""
Given batch of logits, return one-hot sample using epsilon greedy strategy
(based on given epsilon)
"""
# get best (according to current policy) actions in one-hot form
argmax_acs = (logits == logits.max(1, keepdim=True)[0]).float()
if eps == 0.0:
return argmax_acs
# get random actions in one-hot form
rand_acs = Variable(torch.eye(logits.shape[1])[[np.random.choice(
range(logits.shape[1]), size=logits.shape[0])]], requires_grad=False)
# chooses between best and random actions using epsilon greedy
return torch.stack([argmax_acs[i] if r > eps else rand_acs[i] for i, r in
enumerate(torch.rand(logits.shape[0]))])
# modified for PyTorch from https://github.com/ericjang/gumbel-softmax/blob/master/Categorical%20VAE.ipynb
def sample_gumbel(shape, eps=1e-20, tens_type=torch.FloatTensor):
"""Sample from Gumbel(0, 1)"""
U = Variable(tens_type(*shape).uniform_(), requires_grad=False)
return -torch.log(-torch.log(U + eps) + eps)
# modified for PyTorch from https://github.com/ericjang/gumbel-softmax/blob/master/Categorical%20VAE.ipynb
def gumbel_softmax_sample(logits, temperature):
""" Draw a sample from the Gumbel-Softmax distribution"""
y = logits + sample_gumbel(logits.shape, tens_type=type(logits.data))
return F.softmax(y / temperature, dim=1)
# modified for PyTorch from https://github.com/ericjang/gumbel-softmax/blob/master/Categorical%20VAE.ipynb
def gumbel_softmax(logits, temperature=0.5, hard=False):
"""Sample from the Gumbel-Softmax distribution and optionally discretize.
Args:
logits: [batch_size, n_class] unnormalized log-probs
temperature: non-negative scalar
hard: if True, take argmax, but differentiate w.r.t. soft sample y
Returns:
[batch_size, n_class] sample from the Gumbel-Softmax distribution.
If hard=True, then the returned sample will be one-hot, otherwise it will
be a probabilitiy distribution that sums to 1 across classes
"""
y = gumbel_softmax_sample(logits, temperature)
if hard:
y_hard = onehot_from_logits(y)
y = (y_hard - y).detach() + y
return y
# seprates the current agent action from others
def giveCurrentAgentsAction(Actions , agent_number , batch = True , num =2 , Tuples = True):
if(batch):
if(Tuples):
current_agent = Actions[:,agent_number,:]
rest_agents = Actions[:,num - agent_number -1,:]
else:
current_agent = Actions[agent_number,:,:]
rest_agents = Actions[num - agent_number -1,:,:]
#for more agents
#rest_agents = torch.cat((Actions[0:agent_number,:,:], Actions[agent_number+1:,:,:]) , dim=1)
else:
current_agent = Actions[agent_number,:]
rest_agents = Actions[num - agent_number -1,:]
#for more than 2 agents
#rest_agents = torch.cat((Actions[0:agent_number,:], Actions[agent_number+1:,:]) , dim=1)
return current_agent , rest_agents
"""def main():
torch.Tensor()
print(onehot_from_logits())
if __name__=='__main__':
main()"""