@@ -1400,15 +1400,17 @@ double DenoisingRecurrentNet::fpropInputReconstructionFromHidden(Vec hidden, Mat
1400
1400
transposeProduct (reconstruction_activation, reconstruction_weights, hidden);
1401
1401
reconstruction_activation += reconstruction_bias;
1402
1402
1403
- for ( int j=0 ; j<fullinputlength ; j++ ){
1403
+ softmax (reconstruction_activation, reconstruction_prob);
1404
+
1405
+ /* for( int j=0 ; j<fullinputlength ; j++ ){
1404
1406
if(clean_input[j]==1 || clean_input[j]==0)
1405
1407
reconstruction_prob[j] = fastsigmoid( reconstruction_activation[j] );
1406
1408
else
1407
1409
reconstruction_prob[j] = reconstruction_activation[j] ;
1408
- }
1410
+ } */
1409
1411
1410
1412
double result_cost = 0 ;
1411
- if (encoding==" raw_masked_supervised" || encoding==" generic" ) // complicated input format... consider it's squared error
1413
+ if (encoding==" raw_masked_supervised" ) // || encoding=="generic") // complicated input format... consider it's squared error
1412
1414
{
1413
1415
double r = 0 ;
1414
1416
double neg_log_cost = 0 ; // neg log softmax
@@ -1490,6 +1492,7 @@ double DenoisingRecurrentNet::fpropHiddenReconstructionFromLastHidden2(Vec theIn
1490
1492
Vec hidden_act_no_bias;
1491
1493
Vec hidden_exp;
1492
1494
Vec dynamic_act_no_bias_contribution;
1495
+ Vec hidden_gradient2;
1493
1496
if (reconstruction_bias.length ()==0 )
1494
1497
{
1495
1498
reconstruction_bias.resize (fullhiddenlength);
@@ -1509,7 +1512,7 @@ double DenoisingRecurrentNet::fpropHiddenReconstructionFromLastHidden2(Vec theIn
1509
1512
hidden_act_no_bias.resize (fullhiddenlength);
1510
1513
hidden_exp.resize (fullhiddenlength);
1511
1514
dynamic_act_no_bias_contribution.resize (fullhiddenlength);
1512
-
1515
+ hidden_gradient2. resize (fullhiddenlength);
1513
1516
1514
1517
1515
1518
// predict (denoised) input_reconstruction
@@ -1534,20 +1537,41 @@ double DenoisingRecurrentNet::fpropHiddenReconstructionFromLastHidden2(Vec theIn
1534
1537
1535
1538
/* *******************************************************************************/
1536
1539
hidden_reconstruction_activation_grad.resize (reconstruction_prob.size ());
1537
- hidden_reconstruction_activation_grad << reconstruction_prob ;
1540
+ hidden_reconstruction_activation_grad << reconstruction_prob2 ;
1538
1541
hidden_reconstruction_activation_grad -= hidden_target;
1539
1542
hidden_reconstruction_activation_grad *= hidden_reconstruction_cost_weight;
1540
1543
1541
1544
1542
- productAcc (hidden_gradient , reconstruction_weights, hidden_reconstruction_activation_grad); // dynamic matrice tied
1545
+ productAcc (hidden_gradient2 , reconstruction_weights, hidden_reconstruction_activation_grad); // dynamic matrice tied
1543
1546
// transposeProductAcc(hidden_gradient, reconstruction_weights, hidden_reconstruction_activation_grad); //dynamic matrice not tied
1544
1547
1545
1548
// update bias
1546
- multiplyAcc (reconstruction_bias , hidden_reconstruction_activation_grad, -lr);
1549
+ multiplyAcc (reconstruction_bias2 , hidden_reconstruction_activation_grad, -lr);
1547
1550
// update weight
1548
1551
externalProductScaleAcc (acc_weights_gr, hidden, hidden_reconstruction_activation_grad, -lr); // dynamic matrice tied
1549
1552
// externalProductScaleAcc(acc_weights_gr, hidden_reconstruction_activation_grad, hidden, -lr); //dynamic matrice not tied
1550
-
1553
+
1554
+ hidden_reconstruction_activation_grad.clear ();
1555
+
1556
+ // update bias
1557
+ for ( int i=0 ; i<fullhiddenlength ; i++ )
1558
+ {
1559
+ real in_grad_i;
1560
+ in_grad_i = reconstruction_prob[i] * (1 -reconstruction_prob[i]) * hidden_gradient2[i];
1561
+ hidden_reconstruction_activation_grad[i] += in_grad_i;
1562
+
1563
+
1564
+ // update the bias: bias -= learning_rate * input_gradient
1565
+ reconstruction_bias[i] -= lr * in_grad_i;
1566
+
1567
+ }
1568
+
1569
+ productAcc (hidden_gradient, reconstruction_weights, hidden_reconstruction_activation_grad); // dynamic matrice tied
1570
+
1571
+ // update weight
1572
+ externalProductScaleAcc (acc_weights_gr, hidden, hidden_reconstruction_activation_grad, -lr); // dynamic matrice tied
1573
+
1574
+
1551
1575
// update bias2
1552
1576
// multiplyAcc(reconstruction_bias2, hidden_gradient, -lr);
1553
1577
/* *******************************************************************************/
@@ -1958,8 +1982,8 @@ void DenoisingRecurrentNet::recurrentUpdate(real input_reconstruction_weight,
1958
1982
train_n_items[train_costs.length ()-2 ]++;
1959
1983
}
1960
1984
1961
-
1962
- if (i!= 0 && dynamic_connections )
1985
+ // if(i!=0 && dynamic_connections )
1986
+ if (i> 1 && dynamic_connections )
1963
1987
{
1964
1988
1965
1989
// Add contribution of hidden reconstruction cost in hidden_gradient
@@ -1973,6 +1997,7 @@ void DenoisingRecurrentNet::recurrentUpdate(real input_reconstruction_weight,
1973
1997
1974
1998
// truc stan
1975
1999
// fpropHiddenSymmetricDynamicMatrix(hidden_list(i-1), reconstruction_weights, hidden_reconstruction_prob, hidden_list(i), hidden_gradient, hidden_reconstruction_weight, current_learning_rate);
2000
+
1976
2001
train_costs[train_costs.length ()-1 ] += fpropHiddenReconstructionFromLastHidden (input_list[i],
1977
2002
hidden_list (i),
1978
2003
dynamicWeights, // reconsWeights, //dynamicWeights,
@@ -1985,6 +2010,23 @@ void DenoisingRecurrentNet::recurrentUpdate(real input_reconstruction_weight,
1985
2010
hidden_gradient,
1986
2011
hidden_reconstruction_weight,
1987
2012
current_learning_rate);
2013
+
2014
+
2015
+ /*
2016
+ train_costs[train_costs.length()-1] += fpropHiddenReconstructionFromLastHidden2(input_list[i],
2017
+ hidden_list(i),
2018
+ dynamicWeights, //reconsWeights, //dynamicWeights,
2019
+ acc_dynamic_connections_gr, //acc_reconstruction_dynamic_connections_gr, //acc_dynamic_connections_gr,
2020
+ hidden_reconstruction_bias,
2021
+ hidden_reconstruction_bias2,
2022
+ hidden_reconstruction_activation_grad,
2023
+ hidden_reconstruction_prob,
2024
+ hidden_list(i-2),
2025
+ hidden_gradient,
2026
+ hidden_reconstruction_weight,
2027
+ current_learning_rate);
2028
+ */
2029
+
1988
2030
// fpropHiddenReconstructionFromLastHidden(hidden_list(i), reconsWeights, acc_reconstruction_dynamic_connections_gr, hidden_reconstruction_bias, hidden_reconstruction_activation_grad, hidden_reconstruction_prob, hidden_list(i-1), hidden_gradient, hidden_reconstruction_weight, current_learning_rate);
1989
2031
train_n_items[train_costs.length ()-1 ]++;
1990
2032
}
0 commit comments