Skip to content

Commit 8da6b01

Browse files
author
laulysta
committed
softmax update
1 parent 2b16f29 commit 8da6b01

File tree

1 file changed

+52
-10
lines changed

1 file changed

+52
-10
lines changed

plearn_learners_experimental/DenoisingRecurrentNet.cc

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,15 +1400,17 @@ double DenoisingRecurrentNet::fpropInputReconstructionFromHidden(Vec hidden, Mat
14001400
transposeProduct(reconstruction_activation, reconstruction_weights, hidden);
14011401
reconstruction_activation += reconstruction_bias;
14021402

1403-
for( int j=0 ; j<fullinputlength ; j++ ){
1403+
softmax(reconstruction_activation, reconstruction_prob);
1404+
1405+
/*for( int j=0 ; j<fullinputlength ; j++ ){
14041406
if(clean_input[j]==1 || clean_input[j]==0)
14051407
reconstruction_prob[j] = fastsigmoid( reconstruction_activation[j] );
14061408
else
14071409
reconstruction_prob[j] = reconstruction_activation[j] ;
1408-
}
1410+
}*/
14091411

14101412
double result_cost = 0;
1411-
if(encoding=="raw_masked_supervised" || encoding=="generic") // complicated input format... consider it's squared error
1413+
if(encoding=="raw_masked_supervised") // || encoding=="generic") // complicated input format... consider it's squared error
14121414
{
14131415
double r = 0;
14141416
double neg_log_cost = 0; // neg log softmax
@@ -1490,6 +1492,7 @@ double DenoisingRecurrentNet::fpropHiddenReconstructionFromLastHidden2(Vec theIn
14901492
Vec hidden_act_no_bias;
14911493
Vec hidden_exp;
14921494
Vec dynamic_act_no_bias_contribution;
1495+
Vec hidden_gradient2;
14931496
if(reconstruction_bias.length()==0)
14941497
{
14951498
reconstruction_bias.resize(fullhiddenlength);
@@ -1509,7 +1512,7 @@ double DenoisingRecurrentNet::fpropHiddenReconstructionFromLastHidden2(Vec theIn
15091512
hidden_act_no_bias.resize(fullhiddenlength);
15101513
hidden_exp.resize(fullhiddenlength);
15111514
dynamic_act_no_bias_contribution.resize(fullhiddenlength);
1512-
1515+
hidden_gradient2.resize(fullhiddenlength);
15131516

15141517

15151518
// predict (denoised) input_reconstruction
@@ -1534,20 +1537,41 @@ double DenoisingRecurrentNet::fpropHiddenReconstructionFromLastHidden2(Vec theIn
15341537

15351538
/********************************************************************************/
15361539
hidden_reconstruction_activation_grad.resize(reconstruction_prob.size());
1537-
hidden_reconstruction_activation_grad << reconstruction_prob;
1540+
hidden_reconstruction_activation_grad << reconstruction_prob2;
15381541
hidden_reconstruction_activation_grad -= hidden_target;
15391542
hidden_reconstruction_activation_grad *= hidden_reconstruction_cost_weight;
15401543

15411544

1542-
productAcc(hidden_gradient, reconstruction_weights, hidden_reconstruction_activation_grad); //dynamic matrice tied
1545+
productAcc(hidden_gradient2, reconstruction_weights, hidden_reconstruction_activation_grad); //dynamic matrice tied
15431546
//transposeProductAcc(hidden_gradient, reconstruction_weights, hidden_reconstruction_activation_grad); //dynamic matrice not tied
15441547

15451548
//update bias
1546-
multiplyAcc(reconstruction_bias, hidden_reconstruction_activation_grad, -lr);
1549+
multiplyAcc(reconstruction_bias2, hidden_reconstruction_activation_grad, -lr);
15471550
// update weight
15481551
externalProductScaleAcc(acc_weights_gr, hidden, hidden_reconstruction_activation_grad, -lr); //dynamic matrice tied
15491552
//externalProductScaleAcc(acc_weights_gr, hidden_reconstruction_activation_grad, hidden, -lr); //dynamic matrice not tied
1550-
1553+
1554+
hidden_reconstruction_activation_grad.clear();
1555+
1556+
//update bias
1557+
for( int i=0 ; i<fullhiddenlength ; i++ )
1558+
{
1559+
real in_grad_i;
1560+
in_grad_i = reconstruction_prob[i] * (1-reconstruction_prob[i]) * hidden_gradient2[i];
1561+
hidden_reconstruction_activation_grad[i] += in_grad_i;
1562+
1563+
1564+
// update the bias: bias -= learning_rate * input_gradient
1565+
reconstruction_bias[i] -= lr * in_grad_i;
1566+
1567+
}
1568+
1569+
productAcc(hidden_gradient, reconstruction_weights, hidden_reconstruction_activation_grad); //dynamic matrice tied
1570+
1571+
// update weight
1572+
externalProductScaleAcc(acc_weights_gr, hidden, hidden_reconstruction_activation_grad, -lr); //dynamic matrice tied
1573+
1574+
15511575
//update bias2
15521576
//multiplyAcc(reconstruction_bias2, hidden_gradient, -lr);
15531577
/********************************************************************************/
@@ -1958,8 +1982,8 @@ void DenoisingRecurrentNet::recurrentUpdate(real input_reconstruction_weight,
19581982
train_n_items[train_costs.length()-2]++;
19591983
}
19601984

1961-
1962-
if(i!=0 && dynamic_connections )
1985+
//if(i!=0 && dynamic_connections )
1986+
if(i>1 && dynamic_connections )
19631987
{
19641988

19651989
// Add contribution of hidden reconstruction cost in hidden_gradient
@@ -1973,6 +1997,7 @@ void DenoisingRecurrentNet::recurrentUpdate(real input_reconstruction_weight,
19731997

19741998
//truc stan
19751999
//fpropHiddenSymmetricDynamicMatrix(hidden_list(i-1), reconstruction_weights, hidden_reconstruction_prob, hidden_list(i), hidden_gradient, hidden_reconstruction_weight, current_learning_rate);
2000+
19762001
train_costs[train_costs.length()-1] += fpropHiddenReconstructionFromLastHidden(input_list[i],
19772002
hidden_list(i),
19782003
dynamicWeights, //reconsWeights, //dynamicWeights,
@@ -1985,6 +2010,23 @@ void DenoisingRecurrentNet::recurrentUpdate(real input_reconstruction_weight,
19852010
hidden_gradient,
19862011
hidden_reconstruction_weight,
19872012
current_learning_rate);
2013+
2014+
2015+
/*
2016+
train_costs[train_costs.length()-1] += fpropHiddenReconstructionFromLastHidden2(input_list[i],
2017+
hidden_list(i),
2018+
dynamicWeights, //reconsWeights, //dynamicWeights,
2019+
acc_dynamic_connections_gr, //acc_reconstruction_dynamic_connections_gr, //acc_dynamic_connections_gr,
2020+
hidden_reconstruction_bias,
2021+
hidden_reconstruction_bias2,
2022+
hidden_reconstruction_activation_grad,
2023+
hidden_reconstruction_prob,
2024+
hidden_list(i-2),
2025+
hidden_gradient,
2026+
hidden_reconstruction_weight,
2027+
current_learning_rate);
2028+
*/
2029+
19882030
//fpropHiddenReconstructionFromLastHidden(hidden_list(i), reconsWeights, acc_reconstruction_dynamic_connections_gr, hidden_reconstruction_bias, hidden_reconstruction_activation_grad, hidden_reconstruction_prob, hidden_list(i-1), hidden_gradient, hidden_reconstruction_weight, current_learning_rate);
19892031
train_n_items[train_costs.length()-1]++;
19902032
}

0 commit comments

Comments
 (0)