@@ -59,7 +59,7 @@ def __init__(self):
59
59
self .conv1 = nn .Conv2d (1 , 4 , 3 , 1 )
60
60
self .conv2 = nn .Conv2d (4 , 4 , 3 , 1 )
61
61
self .fc1 = nn .Linear (12 * 12 * 4 , 32 )
62
- self .fc2 = nn .Linear (32 , 9 )
62
+ self .fc2 = nn .Linear (32 , 10 )
63
63
64
64
def forward (self , x ):
65
65
x = self .conv1 (x )
@@ -81,7 +81,7 @@ def forward(self, x):
81
81
SUB_TARGET = 9
82
82
83
83
#hyperparameters
84
- learning_rate = 5e-6
84
+ learning_rate = 5e-4
85
85
86
86
#DO NOT CHANGE
87
87
batch_size = 1
@@ -117,7 +117,6 @@ def forward(self, x):
117
117
test_mask = test_only_to_learn .targets == SUB_TARGET
118
118
test_only_to_learn .data = test_only_to_learn .data [test_mask ]
119
119
test_only_to_learn .targets = test_only_to_learn .targets [test_mask ]
120
- test_only_to_learn .targets [test_only_to_learn .targets == SUB_TARGET ] = FORGET_TARGET
121
120
test_only_to_learn_dataloader = DataLoader (test_only_to_learn , batch_size = batch_size )
122
121
123
122
#this will contain only the data about the forgotten class
@@ -144,8 +143,6 @@ def forward(self, x):
144
143
train_mask = training_to_learn .targets != FORGET_TARGET
145
144
training_to_learn .data = training_to_learn .data [train_mask ]
146
145
training_to_learn .targets = training_to_learn .targets [train_mask ]
147
- training_to_learn .targets [training_to_learn .targets == SUB_TARGET ] = FORGET_TARGET
148
-
149
146
150
147
#this will contain the test data where the forgotten class is substituted with the new class
151
148
test_to_learn = datasets .MNIST (
@@ -157,14 +154,30 @@ def forward(self, x):
157
154
test_mask = test_to_learn .targets != FORGET_TARGET
158
155
test_to_learn .data = test_to_learn .data [test_mask ]
159
156
test_to_learn .targets = test_to_learn .targets [test_mask ]
160
- test_to_learn .targets [test_to_learn .targets == SUB_TARGET ] = FORGET_TARGET
161
157
162
158
163
159
################################# Gradient computation part #################################
164
160
161
+ def log_softmax (x ):
162
+ return x - torch .logsumexp (x ,dim = 1 , keepdim = True )
163
+
164
+ def CrossEntropyLoss (outputs , targets ):
165
+ epsilon = 1e-6
166
+ num_examples = targets .shape [0 ]
167
+ batch_size = outputs .shape [0 ]
168
+ outputs = log_softmax (outputs )+ epsilon
169
+ inverse_output = 1 / outputs
170
+ outputs [targets == FORGET_TARGET ]= inverse_output [targets == FORGET_TARGET ]
171
+
172
+ outputs = outputs [range (batch_size ), targets ]
173
+
174
+ return - torch .sum (outputs )/ num_examples
175
+
176
+
177
+
165
178
# Load the model
166
179
model = CNN ()
167
- model .load_state_dict (torch .load ("modelNo9.pth" ))
180
+ model .load_state_dict (torch .load ("modelNo9.pth" , map_location = torch . device ( device ) ))
168
181
169
182
170
183
#create the gradient holders
@@ -221,8 +234,12 @@ def train(dataloader, model, loss_fn, optimizer,scheduler):
221
234
X , y = X .to (device ), y .to (device )
222
235
pred = model (X )
223
236
loss = loss_fn (pred , y )
237
+ myloss = CrossEntropyLoss (pred ,y )
238
+ #print("pytorch Loss:",loss)
239
+ #print("my loss:",myloss)
224
240
optimizer .zero_grad ()
225
- loss .backward ()
241
+ #loss
242
+ myloss .backward ()
226
243
#remove the gradients from fc1 and fc2 using the mask
227
244
#model.fc1.weight.grad[fc1_map == 0] = 0
228
245
#model.fc2.weight.grad[fc2_map == 0] = 0
@@ -267,7 +284,8 @@ def forward(self, input, target):
267
284
return loss
268
285
269
286
270
- loss_fn = MyCustomLoss ()
287
+ #loss_fn = MyCustomLoss()
288
+ loss_fn = nn .CrossEntropyLoss ()
271
289
optimizer = torch .optim .SGD (model .parameters (), lr = learning_rate )
272
290
#scheduler
273
291
scheduler = torch .optim .lr_scheduler .StepLR (optimizer , step_size = 1 , gamma = 0.1 )
0 commit comments