From f5e2a548e1259aad494ee577a0e3849cd0fb07c8 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sat, 18 Feb 2017 18:34:59 +0100
Subject: [PATCH 01/78] Add implementation of the randomized block krylov svd
 method.

---
 src/mlpack/methods/CMakeLists.txt             |   1 +
 .../methods/block_krylov_svd/CMakeLists.txt   |  15 ++
 .../randomized_block_krylov_svd.cpp           |  88 ++++++++++++
 .../randomized_block_krylov_svd.hpp           | 130 ++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 src/mlpack/methods/block_krylov_svd/CMakeLists.txt
 create mode 100644 src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
 create mode 100644 src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index dde69de0b1b..2218c378c10 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -21,6 +21,7 @@ set(DIRS
   approx_kfn
   amf
   ann
+  block_krylov_svd
   cf
   decision_stump
   det
diff --git a/src/mlpack/methods/block_krylov_svd/CMakeLists.txt b/src/mlpack/methods/block_krylov_svd/CMakeLists.txt
new file mode 100644
index 00000000000..6380befb28a
--- /dev/null
+++ b/src/mlpack/methods/block_krylov_svd/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Define the files we need to compile.
+# Anything not in this list will not be compiled into mlpack.
+set(SOURCES
+  randomized_block_krylov_svd.hpp
+  randomized_block_krylov_svd.cpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all mlpack sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
new file mode 100644
index 00000000000..9009c5f6422
--- /dev/null
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
@@ -0,0 +1,88 @@
+/**
+ * @file randomized_block_krylov_svd.cpp
+ * @author Marcus Edel
+ *
+ * Implementation of the randomized SVD method.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#include "randomized_block_krylov_svd.hpp"
+
+namespace mlpack {
+namespace svd {
+
+RandomizedBlockKrylovSVD::RandomizedBlockKrylovSVD(const arma::mat& data,
+                                                   arma::mat& u,
+                                                   arma::vec& s,
+                                                   arma::mat& v,
+                                                   const size_t maxIterations,
+                                                   const size_t rank,
+                                                   const size_t blockSize) :
+    maxIterations(maxIterations),
+    blockSize(blockSize)
+{
+  if (rank == 0)
+  {
+    Apply(data, u, s, v, data.n_rows);
+  }
+  else
+  {
+    Apply(data, u, s, v, rank);
+  }
+}
+
+RandomizedBlockKrylovSVD::RandomizedBlockKrylovSVD(const size_t maxIterations,
+                                                   const size_t blockSize) :
+    maxIterations(maxIterations),
+    blockSize(blockSize)
+{
+  /* Nothing to do here */
+}
+
+void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
+                                     arma::mat& u,
+                                     arma::vec& s,
+                                     arma::mat& v,
+                                     const size_t rank)
+{
+  arma::mat Q, R, block;
+
+  if (blockSize == 0)
+  {
+    blockSize = rank + 10;
+  }
+
+  // Random block initialization.
+  arma::mat G = arma::randn(data.n_rows, blockSize);
+
+  // Construct and orthonormalize Krlov subspace.
+  arma::mat K(data.n_rows, blockSize * (maxIterations + 1));
+  arma::qr_econ(block, R, data * G);
+
+  // Copy the temporary memory to the right place.
+  K.submat(0, 0, block.n_rows - 1, block.n_cols - 1) = block;
+
+  for (size_t i = 0, b = block.n_cols; i < maxIterations; ++i,
+      b += block.n_cols)
+  {
+    arma::qr_econ(block, R, data * (data.t() * block));
+    K.submat(0, b, block.n_rows - 1, b + block.n_cols - 1) = block;
+  }
+
+  arma::qr_econ(Q, R, K);
+
+  // Approximate eigenvalues and eigenvectors using Rayleigh–Ritz method.
+  arma::svd_econ(u, s, v, Q.t() * data);
+
+  // Do economical singular value decomposition and compute only the
+  // approximations of the left singular vectors by using the centered data
+  // applied to Q.
+  u = Q * u;
+}
+
+} // namespace svd
+} // namespace mlpack
diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
new file mode 100644
index 00000000000..964a654ea9d
--- /dev/null
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
@@ -0,0 +1,130 @@
+/**
+ * @file randomized_block_krylov_svd.hpp
+ * @author Marcus Edel
+ *
+ * An implementation of the randomized SVD method.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#ifndef MLPACK_METHODS_BLOCK_KRYLOV_SVD_RANDOMIZED_BLOCK_KRYLOV_SVD_HPP
+#define MLPACK_METHODS_BLOCK_KRYLOV_SVD_RANDOMIZED_BLOCK_KRYLOV_SVD_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace svd {
+
+/**
+ * Randomized block krylov SVD is a matrix factorization that is based on
+ * randomized matrix approximation techniques, developed in in
+ * "Randomized Block Krylov Methods for Stronger and Faster Approximate
+ * Singular Value Decomposition".
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @inproceedings{Musco2015,
+ *   author    = {Cameron Musco and Christopher Musco},
+ *   title     = {Randomized Block Krylov Methods for Stronger and Faster
+ *                Approximate Singular Value Decomposition},
+ *   booktitle = {Advances in Neural Information Processing Systems 28: Annual
+ *                Conference on Neural Information Processing Systems 2015,
+ *                December 7-12, 2015, Montreal, Quebec, Canada},
+ *   pages     = {1396--1404},
+ *   year      = {2015},
+ * }
+ * @endcode
+ *
+ * @code
+ *
+ * An example of how to use the interface is shown below:
+ *
+ * @code
+ * arma::mat data; // Rating data in the form of coordinate list.
+ *
+ * const size_t rank = 20; // Rank used for the decomposition.
+ *
+ * // Make a RandomizedBlockKrylovSVD object.
+ * RandomizedBlockKrylovSVD bSVD();
+ *
+ * arma::mat u, s, v;
+ *
+ * // Use the Apply() method to get a factorization.
+ * bSVD.Apply(data, u, s, v, rank);
+ * @endcode
+ */
+class RandomizedBlockKrylovSVD
+{
+ public:
+  /**
+   * Create object for the randomized block krylov SVD method.
+   *
+   * @param data Data matrix.
+   * @param u First unitary matrix.
+   * @param v Second unitary matrix.
+   * @param s Diagonal matrix of singular values.
+   * @param maxIterations Number of iterations for the power method
+   *        (Default: 2).
+   * @param rank Rank of the approximation (Default: number of rows.)
+   * @param blockSize The block size, must be >= rank (Default: rank + 10).
+   */
+  RandomizedBlockKrylovSVD(const arma::mat& data,
+                           arma::mat& u,
+                           arma::vec& s,
+                           arma::mat& v,
+                           const size_t maxIterations = 2,
+                           const size_t rank = 0,
+                           const size_t blockSize = 0);
+
+  /**
+   * Create object for the randomized block krylov SVD method.
+   *
+   * @param maxIterations Number of iterations for the power method
+   *        (Default: 2).
+   * @param blockSize The block size, must be >= rank (Default: rank + 10).
+   */
+  RandomizedBlockKrylovSVD(const size_t maxIterations = 2,
+                           const size_t blockSize = 0);
+
+  /**
+   * Apply Principal Component Analysis to the provided data set using the
+   * randomized block krylov SVD.
+   *
+   * @param data Data matrix.
+   * @param u First unitary matrix.
+   * @param v Second unitary matrix.
+   * @param s Diagonal matrix of singular values.
+   * @param rank Rank of the approximation.
+   */
+  void Apply(const arma::mat& data,
+             arma::mat& u,
+             arma::vec& s,
+             arma::mat& v,
+             const size_t rank);
+
+  //! Get the number of iterations for the power method.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the number of iterations for the power method.
+  size_t& MaxIterations() { return maxIterations; }
+
+  //! Get the block size.
+  size_t BlockSize() const { return blockSize; }
+  //! Modify the block size.
+  size_t& BlockSize() { return blockSize; }
+
+ private:
+  //! Locally stored number of iterations for the power method.
+  size_t maxIterations;
+
+  //! The block size value.
+  size_t blockSize;
+};
+
+} // namespace svd
+} // namespace mlpack
+
+#endif

From 4db6379cd8297932145aff7df9f649d779e7186f Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sat, 18 Feb 2017 18:35:49 +0100
Subject: [PATCH 02/78] Add test cases for the randomized block krylov svd
 method.

---
 src/mlpack/tests/CMakeLists.txt            |   1 +
 src/mlpack/tests/block_krylov_svd_test.cpp | 116 +++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 src/mlpack/tests/block_krylov_svd_test.cpp

diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 514c4534502..8e95e6069bb 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -11,6 +11,7 @@ add_executable(mlpack_test
   armadillo_svd_test.cpp
   aug_lagrangian_test.cpp
   binarize_test.cpp
+  block_krylov_svd_test.cpp
   cf_test.cpp
   cli_test.cpp
   convolution_test.cpp
diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp
new file mode 100644
index 00000000000..4ca9c6258d2
--- /dev/null
+++ b/src/mlpack/tests/block_krylov_svd_test.cpp
@@ -0,0 +1,116 @@
+/**
+ * @file block_krylov_svd_test.cpp
+ * @author Marcus Edel
+ *
+ * Test file for the Randomized Block Krylov SVD class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "test_tools.hpp"
+
+BOOST_AUTO_TEST_SUITE(BlockKrylovSVDTest);
+
+using namespace mlpack;
+
+// Generate a low rank matrix with bell-shaped singular values.
+void CreateNoisyLowRankMatrix(arma::mat& data,
+                              const size_t rows,
+                              const size_t cols,
+                              const size_t rank,
+                              const double strength)
+{
+  arma::mat R, U, V;
+  const size_t n = std::min(rows, cols);
+
+  arma::qr_econ(U, R, arma::randn<arma::mat>(rows, n));
+  arma::qr_econ(V, R, arma::randn<arma::mat>(cols, n));
+
+  arma::vec ids = arma::linspace<arma::vec>(0, n - 1, n);
+
+  arma::vec lowRank = ((1 - strength) *
+      arma::exp(-1.0 * arma::pow((ids / rank), 2)));
+  arma::vec tail = strength * arma::exp(-0.1 * ids / rank);
+
+  arma::mat s = arma::eye<arma::mat>(n, n) * (lowRank + tail);
+  data = (U * s) * V.t();
+}
+
+/**
+ * The reconstruction and sigular value error of the obtained SVD should be
+ * small.
+ */
+BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDReconstructionError)
+{
+  arma::mat U = arma::randn<arma::mat>(3, 20);
+  arma::mat V = arma::randn<arma::mat>(10, 3);
+
+  arma::mat R;
+  arma::qr_econ(U, R, U);
+  arma::qr_econ(V, R, V);
+
+  arma::mat s = arma::diagmat(arma::vec("1 0.1 0.01"));
+
+  arma::mat data = arma::trans(U * arma::diagmat(s) * V.t());
+
+  // Center the data into a temporary matrix.
+  arma::mat centeredData;
+  math::Center(data, centeredData);
+
+  arma::mat U1, U2, V1, V2;
+  arma::vec s1, s2, s3;
+
+  arma::svd_econ(U1, s1, V1, centeredData);
+
+  svd::RandomizedBlockKrylovSVD rSVD(20, 10);
+  rSVD.Apply(centeredData, U2, s2, V2, 3);
+
+  // Use the same amount of data for the compariosn (matrix rank).
+  s3 = s1.subvec(0, s2.n_elem - 1);
+
+  // The sigular value error should be small.
+  double error = arma::norm(s2 - s3, "frob") / arma::norm(s2, "frob");
+  BOOST_REQUIRE_SMALL(error, 1e-5);
+
+  arma::mat reconstruct = U2 * arma::diagmat(s2) * V2.t();
+
+  // The relative reconstruction error should be small.
+  error = arma::norm(centeredData - reconstruct, "frob") /
+      arma::norm(centeredData, "frob");
+  BOOST_REQUIRE_SMALL(error, 1e-5);
+}
+
+/*
+ * Check if the method can handle noisy matrices.
+ */
+BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNosiyLowRankTest)
+{
+  arma::mat data;
+  CreateNoisyLowRankMatrix(data, 100, 1000, 5, 1.0);
+
+  const size_t rank = 5;
+
+  arma::mat U1, U2, V1, V2;
+  arma::vec s1, s2, s3;
+
+  arma::svd_econ(U1, s1, V1, data);
+
+  svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 1, rank, 5);
+
+  double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank)));
+  BOOST_REQUIRE_SMALL(error, 0.1);
+
+  svd::RandomizedBlockKrylovSVD rSVDB(data, U2, s2, V2, 10, rank, 20);
+
+  error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank)));
+  BOOST_REQUIRE_SMALL(error, 1e-3);
+}
+
+BOOST_AUTO_TEST_SUITE_END();

From 397ef54120ceed8ad463e9db6d93f158e6a02eff Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sun, 19 Feb 2017 17:53:02 +0100
Subject: [PATCH 03/78] Remove unused open @code block.

---
 .../methods/block_krylov_svd/randomized_block_krylov_svd.hpp    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
index 964a654ea9d..f7d77ab4c50 100644
--- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
@@ -39,8 +39,6 @@ namespace svd {
  * }
  * @endcode
  *
- * @code
- *
  * An example of how to use the interface is shown below:
  *
  * @code

From 6cb13c1c92ea82287171ae7d2baa9c1dab77bc6a Mon Sep 17 00:00:00 2001
From: Abhinav Moudgil <abhinavmoudgil95@gmail.com>
Date: Wed, 29 Mar 2017 17:56:29 +0530
Subject: [PATCH 04/78] Fix VanillaNetworkTest

---
 .../tests/convolutional_network_test.cpp      | 88 ++++++++-----------
 1 file changed, 38 insertions(+), 50 deletions(-)

diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp
index efafcc9c24c..6d79f7605b2 100644
--- a/src/mlpack/tests/convolutional_network_test.cpp
+++ b/src/mlpack/tests/convolutional_network_test.cpp
@@ -1,6 +1,7 @@
 /**
  * @file convolutional_network_test.cpp
  * @author Marcus Edel
+ * @author Abhinav Moudgil
  *
  * Tests the convolutional neural network.
  *
@@ -23,7 +24,6 @@ using namespace mlpack;
 using namespace mlpack::ann;
 using namespace mlpack::optimization;
 
-
 BOOST_AUTO_TEST_SUITE(ConvolutionalNetworkTest);
 
 /**
@@ -47,11 +47,13 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
   {
     if (i < nPoints / 2)
     {
-      Y(i) = 4;
+      // class 1 - digit = 4
+      Y(i) = 1;
     }
     else
     {
-      Y(i) = 9;
+      // class 2 - digit = 9
+      Y(i) = 2;
     }
   }
 
@@ -73,60 +75,46 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
    * +---+        +---+        +---+        +---+        +---+    +---+
    */
 
-  // It isn't guaranteed that the network will converge in the specified number
-  // of iterations using random weights. If this works 1 of 5 times, I'm fine
-  // with that. All I want to know is that the network is able to escape from
-  // local minima and to solve the task.
-  size_t successes = 0;
-  for (size_t trial = 0; trial < 5; ++trial)
+  FFN<NegativeLogLikelihood<>, RandomInitialization> model;
+
+  model.Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
+  model.Add<ReLULayer<> >();
+  model.Add<MaxPooling<> >(8, 8, 2, 2);
+  model.Add<Convolution<> >(8, 12, 2, 2);
+  model.Add<ReLULayer<> >();
+  model.Add<MaxPooling<> >(2, 2, 2, 2);
+  model.Add<Linear<> >(192, 20);
+  model.Add<ReLULayer<> >();
+  model.Add<Linear<> >(20, 10);
+  model.Add<ReLULayer<> >();
+  model.Add<Linear<> >(10, 2);
+  model.Add<LogSoftMax<> >();
+
+  RMSprop<decltype(model)> opt(model, 0.001, 0.88, 1e-8, 5000, -1);
+
+  model.Train(std::move(X), std::move(Y), opt);
+
+  arma::mat predictionTemp;
+  model.Predict(X, predictionTemp);
+  arma::mat prediction = arma::zeros<arma::mat>(1, predictionTemp.n_cols);
+
+  for (size_t i = 0; i < predictionTemp.n_cols; ++i)
   {
-    FFN<NegativeLogLikelihood<>, GaussianInitialization> model;
-
-    model.Add<Convolution<> >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28);
-    model.Add<ReLULayer<> >();
-    model.Add<MaxPooling<> >(8, 8, 2, 2);
-    model.Add<Convolution<> >(8, 12, 2, 2);
-    model.Add<ReLULayer<> >();
-    model.Add<MaxPooling<> >(2, 2, 2, 2);
-    model.Add<Linear<> >(192, 20);
-    model.Add<ReLULayer<> >();
-    model.Add<Linear<> >(20, 30);
-    model.Add<ReLULayer<> >();
-    model.Add<Linear<> >(30, 10);
-    model.Add<LogSoftMax<> >();
-
-    RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8, 5000, -1);
-
-    model.Train(std::move(X), std::move(Y), opt);
-
-    arma::mat predictionTemp;
-    model.Predict(X, predictionTemp);
-    arma::mat prediction = arma::zeros<arma::mat>(1, predictionTemp.n_cols);
-
-    for (size_t i = 0; i < predictionTemp.n_cols; ++i)
-    {
-      prediction(i) = arma::as_scalar(arma::find(
+    prediction(i) = arma::as_scalar(arma::find(
           arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1;
-    }
-
-    size_t error = 0;
-    for (size_t i = 0; i < X.n_cols; i++)
-    {
-      if (prediction(i) == Y(i))
-      {
-        error++;
-      }
-    }
+  }
 
-    double classificationError = 1 - double(error) / X.n_cols;
-    if (classificationError <= 0.2)
+  size_t correct = 0;
+  for (size_t i = 0; i < X.n_cols; i++)
+  {
+    if (prediction(i) == Y(i))
     {
-      ++successes;
-      break;
+      correct++;
     }
   }
 
-  BOOST_REQUIRE_GE(successes, 1);
+  double classificationError = 1 - double(correct) / X.n_cols;
+  BOOST_REQUIRE_LE(classificationError, 0.2);
 }
 
 BOOST_AUTO_TEST_SUITE_END();

From c7872c20c610e45ac24ed5e35b9a35c11a84fb78 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Wed, 1 Mar 2017 21:07:00 +0530
Subject: [PATCH 05/78] Implement the SMORMS3 optimizer

---
 .../core/optimizers/smorms3/CMakeLists.txt    |  11 ++
 .../core/optimizers/smorms3/smorms3.hpp       | 153 ++++++++++++++++++
 .../core/optimizers/smorms3/smorms3_impl.hpp  | 146 +++++++++++++++++
 3 files changed, 310 insertions(+)
 create mode 100644 src/mlpack/core/optimizers/smorms3/CMakeLists.txt
 create mode 100644 src/mlpack/core/optimizers/smorms3/smorms3.hpp
 create mode 100644 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp

diff --git a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
new file mode 100644
index 00000000000..68b6de814cd
--- /dev/null
+++ b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(SOURCES
+  smorms3.hpp
+  smorms3_impl.hpp
+)
+
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
new file mode 100644
index 00000000000..d4ac9810b58
--- /dev/null
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -0,0 +1,153 @@
+/**
+ * @file smorms3.hpp
+ * @author Vivek Pal
+ *
+ * SMORMS3 i.e. squared mean over root mean squared cubed optimizer. It is a
+ * hybrid of RMSprop, which estimates a safe and optimal distance based on
+ * curvature and Yann LeCun’s method in "No more pesky learning rates".
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP
+#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace optimization {
+
+/**
+ * SMORMS3 is an optimizer that estimates a safe and optimal distance based on
+ * curvature and normalizing the stepsize in the parameter space. It is a hybrid
+ * of RMSprop and Yann LeCun’s method in "No more pesky learning rates".
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @misc{Funk2015,
+ *   author    = {Simon Funk},
+ *   title     = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
+ *   year      = {2015}
+ * }
+ * @endcode
+ *
+ *
+ * For SMORMS3 to work, a DecomposableFunctionType template parameter is required.
+ * This class must implement the following function:
+ *
+ *   size_t NumFunctions();
+ *   double Evaluate(const arma::mat& coordinates, const size_t i);
+ *   void Gradient(const arma::mat& coordinates,
+ *                 const size_t i,
+ *                 arma::mat& gradient);
+ *
+ * NumFunctions() should return the number of functions (\f$n\f$), and in the
+ * other two functions, the parameter i refers to which individual function (or
+ * gradient) is being evaluated.  So, for the case of a data-dependent function,
+ * such as NCA (see mlpack::nca::NCA), NumFunctions() should return the number
+ * of points in the dataset, and Evaluate(coordinates, 0) will evaluate the
+ * objective function on the first point in the dataset (presumably, the dataset
+ * is held internally in the DecomposableFunctionType).
+ *
+ * @tparam DecomposableFunctionType Decomposable objective function type to be
+ *     minimized.
+ */
+template<typename DecomposableFunctionType>
+class SMORMS3
+{
+ public:
+  /**
+   * Construct the SMORMS3 optimizer with the given function and parameters. The
+   * defaults here are not necessarily good for the given problem, so it is
+   * suggested that the values used be tailored to the task at hand.  The
+   * maximum number of iterations refers to the maximum number of points that
+   * are processed (i.e., one iteration equals one point; one iteration does not
+   * equal one pass over the dataset).
+   *
+   * @param function Function to be optimized (minimized).
+   * @param lRate Learning rate or step size for each iteration.
+   * @param eps Value used to initialise the mean squared gradient parameter.
+   * @param maxIterations Maximum number of iterations allowed (0 means no
+   *        limit).
+   * @param tolerance Maximum absolute tolerance to terminate algorithm.
+   * @param shuffle If true, the function order is shuffled; otherwise, each
+   *        function is visited in linear order.
+   */
+  SMORMS3(DecomposableFunctionType& function,
+      const double lRate = 0.001,
+      const double eps = 1e-16,
+      const size_t maxIterations = 100000,
+      const double tolerance = 1e-5,
+      const bool shuffle = true);
+
+  /**
+   * Optimize the given function using SMORMS3. The given starting point will 
+   * be modified to store the finishing point of the algorithm, and the final
+   * objective value is returned.
+   *
+   * @param iterate Starting point (will be modified).
+   * @return Objective value of the final point.
+   */
+  double Optimize(arma::mat& iterate);
+
+  //! Get the instantiated function to be optimized.
+  const DecomposableFunctionType& Function() const { return function; }
+  //! Modify the instantiated function.
+  DecomposableFunctionType& Function() { return function; }
+
+  //! Get the learning rate.
+  double LRate() const { return lRate; }
+  //! Modify the learning rate.
+  double& LRate() { return lRate; }
+
+  //! Get the value used to initialise the mean squared gradient parameter.
+  double Epsilon() const { return eps; }
+  //! Modify the value used to initialise the mean squared gradient parameter.
+  double& Epsilon() { return eps; }
+
+  //! Get the maximum number of iterations (0 indicates no limit).
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the maximum number of iterations (0 indicates no limit).
+  size_t& MaxIterations() { return maxIterations; }
+
+  //! Get the tolerance for termination.
+  double Tolerance() const { return tolerance; }
+  //! Modify the tolerance for termination.
+  double& Tolerance() { return tolerance; }
+
+  //! Get whether or not the individual functions are shuffled.
+  bool Shuffle() const { return shuffle; }
+  //! Modify whether or not the individual functions are shuffled.
+  bool& Shuffle() { return shuffle; }
+
+ private:
+  //! The instantiated function.
+  DecomposableFunctionType& function;
+
+  //! The learning rate for each example.
+  double lRate;
+
+  //! The value used to initialise the mean squared gradient parameter.
+  double eps;
+
+  //! The maximum number of allowed iterations.
+  size_t maxIterations;
+
+  //! The tolerance for termination.
+  double tolerance;
+
+  //! Controls whether or not the individual functions are shuffled when
+  //! iterating.
+  bool shuffle;
+};
+
+} // namespace optimization
+} // namespace mlpack
+
+// Include implementation.
+#include "smorms3_impl.hpp"
+
+#endif
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
new file mode 100644
index 00000000000..f7c5f37cd74
--- /dev/null
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -0,0 +1,146 @@
+/**
+ * @file smorms3_impl.hpp
+ * @author Vivek Pal
+ *
+ * Implementation of the SMORMS3 optimizer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
+#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "smorms3.hpp"
+
+namespace mlpack {
+namespace optimization {
+
+template<typename DecomposableFunctionType>
+SMORMS3<DecomposableFunctionType>::SMORMS3(DecomposableFunctionType& function,
+                                     const double lRate,
+                                     const double eps,
+                                     const size_t maxIterations,
+                                     const double tolerance,
+                                     const bool shuffle) :
+    function(function),
+    lRate(lRate),
+    eps(eps),
+    maxIterations(maxIterations),
+    tolerance(tolerance),
+    shuffle(shuffle)
+{ /* Nothing to do. */ }
+
+//! Optimize the function (minimize).
+template<typename DecomposableFunctionType>
+double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
+{
+  // Find the number of functions to use.
+  const size_t numFunctions = function.NumFunctions();
+
+  // This is used only if shuffle is true.
+  arma::Col<size_t> visitationOrder;
+  if (shuffle)
+    visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0,
+        (numFunctions - 1), numFunctions));
+
+  // To keep track of where we are and how things are going.
+  size_t currentFunction = 0;
+  double overallObjective = 0;
+  double lastObjective = DBL_MAX;
+
+  // Calculate the first objective function.
+  for (size_t i = 0; i < numFunctions; ++i)
+    overallObjective += function.Evaluate(iterate, i);
+
+  // Initialise the parameters mem, g and g2.
+  arma::mat mem = arma::ones<arma::mat>(iterate.n_rows, iterate.n_cols);
+
+  arma::mat g = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
+
+  arma::mat g2 = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
+
+  // Now iterate!
+  arma::mat gradient(iterate.n_rows, iterate.n_cols);
+  
+  for (size_t i = 1; i != maxIterations; ++i, ++currentFunction)
+  {
+    // Is this iteration the start of a sequence?
+    if ((currentFunction % numFunctions) == 0)
+    {
+      // Output current objective function.
+      Log::Info << "SMORMS3: iteration " << i << ", objective "
+          << overallObjective << "." << std::endl;
+
+      if (std::isnan(overallObjective) || std::isinf(overallObjective))
+      {
+        Log::Warn << "SMORMS3: converged to " << overallObjective
+            << "; terminating with failure. Try a smaller step size?"
+            << std::endl;
+        return overallObjective;
+      }
+
+      if (std::abs(lastObjective - overallObjective) < tolerance)
+      {
+        Log::Info << "SMORMS3: minimized within tolerance " << tolerance << "; "
+            << "terminating optimization." << std::endl;
+        return overallObjective;
+      }
+
+      // Reset the counter variables.
+      lastObjective = overallObjective;
+      overallObjective = 0;
+      currentFunction = 0;
+
+      if (shuffle) // Determine order of visitation.
+        visitationOrder = arma::shuffle(visitationOrder);
+    }
+
+    // Evaluate the gradient for this iteration.
+    if (shuffle)
+      function.Gradient(iterate, visitationOrder[currentFunction], gradient);
+    else
+      function.Gradient(iterate, currentFunction, gradient);
+
+    // And update the iterate.
+    arma::mat r = 1 / (mem + 1);
+
+    g = (1 - r) % g;
+    g += r % gradient;
+
+    g2 = (1 - r) % g2;
+    g2 += r % (gradient % gradient);
+
+    arma::mat x = (g % g) / (g2 + eps);
+
+    arma::mat lRateMat(x.n_rows, x.n_cols);
+    x.fill(lRate);
+
+    iterate -= gradient * arma::min(x, lRate) / (arma::sqrt(g2) + eps);
+
+    mem *= (1 - x);
+    mem += 1;
+
+    // Now add that to the overall objective function.
+    if (shuffle)
+      overallObjective += function.Evaluate(iterate,
+          visitationOrder[currentFunction]);
+    else
+      overallObjective += function.Evaluate(iterate, currentFunction);
+  }
+
+  Log::Info << "SMORMS3: maximum iterations (" << maxIterations << ") reached; "
+      << "terminating optimization." << std::endl;
+  // Calculate final objective.
+  overallObjective = 0;
+  for (size_t i = 0; i < numFunctions; ++i)
+    overallObjective += function.Evaluate(iterate, i);
+  return overallObjective;
+}
+
+} // namespace optimization
+} // namespace mlpack
+
+#endif

From 946ba643d094c6407ba1e65940c8dea129981639 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Wed, 1 Mar 2017 21:09:02 +0530
Subject: [PATCH 06/78] Add tests for SMORMS3 optimizer

---
 src/mlpack/tests/smorms3_test.cpp | 109 ++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 src/mlpack/tests/smorms3_test.cpp

diff --git a/src/mlpack/tests/smorms3_test.cpp b/src/mlpack/tests/smorms3_test.cpp
new file mode 100644
index 00000000000..370e6d3f8de
--- /dev/null
+++ b/src/mlpack/tests/smorms3_test.cpp
@@ -0,0 +1,109 @@
+/**
+ * @file smorms3_test.cpp
+ * @author Vivek Pal
+ *
+ * Tests the SMORMS3 optimizer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#include <mlpack/core.hpp>
+
+#include <mlpack/core/optimizers/smorms3/smorms3.hpp>
+#include <mlpack/core/optimizers/sgd/test_function.hpp>
+#include <mlpack/methods/logistic_regression/logistic_regression.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "test_tools.hpp"
+
+using namespace arma;
+using namespace mlpack::optimization;
+using namespace mlpack::optimization::test;
+
+using namespace mlpack::distribution;
+using namespace mlpack::regression;
+
+using namespace mlpack;
+
+BOOST_AUTO_TEST_SUITE(SMORMS3Test);
+
+/**
+ * Tests the SMORMS3 optimizer using a simple test function.
+ */
+BOOST_AUTO_TEST_CASE(SimpleSMORMS3TestFunction)
+{
+  SGDTestFunction f;
+  SMORMS3<SGDTestFunction> optimizer(f, 1e-3, 1e-16, 5000000, 1e-9, true);
+
+  arma::mat coordinates = f.GetInitialPoint();
+  optimizer.Optimize(coordinates);
+
+  BOOST_REQUIRE_SMALL(coordinates[0], 0.1);
+  BOOST_REQUIRE_SMALL(coordinates[1], 0.1);
+  BOOST_REQUIRE_SMALL(coordinates[2], 0.1);
+}
+
+/**
+ * Run SMORMS3 on logistic regression and make sure the results are acceptable.
+ */
+BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest)
+{
+  // Generate a two-Gaussian dataset.
+  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
+  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
+
+  arma::mat data(3, 1000);
+  arma::Row<size_t> responses(1000);
+  for (size_t i = 0; i < 500; ++i)
+  {
+    data.col(i) = g1.Random();
+    responses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    data.col(i) = g2.Random();
+    responses[i] = 1;
+  }
+
+  // Shuffle the dataset.
+  arma::uvec indices = arma::shuffle(arma::linspace<arma::uvec>(0,
+      data.n_cols - 1, data.n_cols));
+  arma::mat shuffledData(3, 1000);
+  arma::Row<size_t> shuffledResponses(1000);
+  for (size_t i = 0; i < data.n_cols; ++i)
+  {
+    shuffledData.col(i) = data.col(indices[i]);
+    shuffledResponses[i] = responses[indices[i]];
+  }
+
+  // Create a test set.
+  arma::mat testData(3, 1000);
+  arma::Row<size_t> testResponses(1000);
+  for (size_t i = 0; i < 500; ++i)
+  {
+    testData.col(i) = g1.Random();
+    testResponses[i] = 0;
+  }
+  for (size_t i = 500; i < 1000; ++i)
+  {
+    testData.col(i) = g2.Random();
+    testResponses[i] = 1;
+  }
+
+  LogisticRegression<> lr(shuffledData.n_rows, 0.5);
+
+  LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
+  SMORMS3<LogisticRegressionFunction<> > smorms3(lrf);
+  lr.Train(smorms3);
+
+  // Ensure that the error is close to zero.
+  const double acc = lr.ComputeAccuracy(data, responses);
+  BOOST_REQUIRE_CLOSE(acc, 100.0, 0.3); // 0.3% error tolerance.
+
+  const double testAcc = lr.ComputeAccuracy(testData, testResponses);
+  BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance.
+}
+
+BOOST_AUTO_TEST_SUITE_END();

From 4b4c2d76c78ef39142a1cf10680f76d5003cacb3 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Wed, 1 Mar 2017 21:11:07 +0530
Subject: [PATCH 07/78] Update optimizers and tests CMakeLists

---
 src/mlpack/core/optimizers/CMakeLists.txt | 1 +
 src/mlpack/tests/CMakeLists.txt           | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/mlpack/core/optimizers/CMakeLists.txt b/src/mlpack/core/optimizers/CMakeLists.txt
index c0b5f563f58..072fd27b32e 100644
--- a/src/mlpack/core/optimizers/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/CMakeLists.txt
@@ -10,6 +10,7 @@ set(DIRS
   sa
   sdp
   sgd
+  smorms3
 )
 
 foreach(dir ${DIRS})
diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 8f18594a9dc..b93077b25ed 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -82,6 +82,7 @@ add_executable(mlpack_test
   serialization.hpp
   serialization.cpp
   serialization_test.cpp
+  smorms3_test.cpp
   softmax_regression_test.cpp
   sort_policy_test.cpp
   sparse_autoencoder_test.cpp

From ddb3128601eb0d8ea0ac5764765cf11a4f02e32f Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 2 Mar 2017 13:56:00 +0530
Subject: [PATCH 08/78] Fill matrix lRateMat with lRate not matrix x

---
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index f7c5f37cd74..1e7fcd4daa2 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -116,7 +116,7 @@ double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
     arma::mat x = (g % g) / (g2 + eps);
 
     arma::mat lRateMat(x.n_rows, x.n_cols);
-    x.fill(lRate);
+    lRateMat.fill(lRate);
 
     iterate -= gradient * arma::min(x, lRate) / (arma::sqrt(g2) + eps);
 

From c812c18d836c6de74f3ee2deb1172fa66570ec66 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 2 Mar 2017 13:56:52 +0530
Subject: [PATCH 09/78] Fix a typo

---
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index 1e7fcd4daa2..67f1190a10f 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -118,7 +118,7 @@ double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
     arma::mat lRateMat(x.n_rows, x.n_cols);
     lRateMat.fill(lRate);
 
-    iterate -= gradient * arma::min(x, lRate) / (arma::sqrt(g2) + eps);
+    iterate -= gradient * arma::min(x, lRateMat) / (arma::sqrt(g2) + eps);
 
     mem *= (1 - x);
     mem += 1;

From 9945b04f255fb7135290b9df57d60efaa6aef187 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 2 Mar 2017 13:59:29 +0530
Subject: [PATCH 10/78] Use (%) Schur product instead of (*) Matrix
 multiplication

---
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index 67f1190a10f..5f305bcadf8 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -118,7 +118,7 @@ double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
     arma::mat lRateMat(x.n_rows, x.n_cols);
     lRateMat.fill(lRate);
 
-    iterate -= gradient * arma::min(x, lRateMat) / (arma::sqrt(g2) + eps);
+    iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps);
 
     mem *= (1 - x);
     mem += 1;

From dac421821eb0694b898932376689d09092377720 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 2 Mar 2017 14:02:31 +0530
Subject: [PATCH 11/78] Remove unnecessary newlines

---
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index 5f305bcadf8..3cc8fe48b0f 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -57,9 +57,7 @@ double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
 
   // Initialise the parameters mem, g and g2.
   arma::mat mem = arma::ones<arma::mat>(iterate.n_rows, iterate.n_cols);
-
   arma::mat g = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
-
   arma::mat g2 = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
 
   // Now iterate!

From 558de23e1d7bbcbd53a92c757ebc98c081bd7c1e Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 9 Mar 2017 20:40:01 +0530
Subject: [PATCH 12/78] Move initialisation of lRateMat outside the for loop

---
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index 3cc8fe48b0f..44c2da02723 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -60,6 +60,10 @@ double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
   arma::mat g = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
   arma::mat g2 = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
 
+  // Initialise a matrix filled with lRate.
+  arma::mat lRateMat(iterate.n_rows, iterate.n_cols);
+  lRateMat.fill(lRate);
+
   // Now iterate!
   arma::mat gradient(iterate.n_rows, iterate.n_cols);
   
@@ -113,9 +117,6 @@ double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
 
     arma::mat x = (g % g) / (g2 + eps);
 
-    arma::mat lRateMat(x.n_rows, x.n_cols);
-    lRateMat.fill(lRate);
-
     iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps);
 
     mem *= (1 - x);

From 03408c5742b93c2d28b6046f18d2ca2cf74b621c Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 9 Mar 2017 20:44:23 +0530
Subject: [PATCH 13/78] Remove leading underscores in the macros

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp      | 4 ++--
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index d4ac9810b58..dcf971128db 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -11,8 +11,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP
-#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP
+#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP
+#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP
 
 #include <mlpack/prereqs.hpp>
 
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index 44c2da02723..3259d379666 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -9,8 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
-#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
+#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
+#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
 
 // In case it hasn't been included yet.
 #include "smorms3.hpp"

From 8ec825c738d5a4ff3575eb746537a6795e100210 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 9 Mar 2017 20:50:58 +0530
Subject: [PATCH 14/78] Rename eps parameter in the constructor as epsilon

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index dcf971128db..0b1359d929a 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -69,7 +69,7 @@ class SMORMS3
    *
    * @param function Function to be optimized (minimized).
    * @param lRate Learning rate or step size for each iteration.
-   * @param eps Value used to initialise the mean squared gradient parameter.
+   * @param epsilon Value used to initialise the mean squared gradient parameter.
    * @param maxIterations Maximum number of iterations allowed (0 means no
    *        limit).
    * @param tolerance Maximum absolute tolerance to terminate algorithm.
@@ -78,7 +78,7 @@ class SMORMS3
    */
   SMORMS3(DecomposableFunctionType& function,
       const double lRate = 0.001,
-      const double eps = 1e-16,
+      const double epsilon = 1e-16,
       const size_t maxIterations = 100000,
       const double tolerance = 1e-5,
       const bool shuffle = true);

From d798b3dfb1dd3b359385d961e3d533b5c72f1afd Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 9 Mar 2017 20:54:32 +0530
Subject: [PATCH 15/78] Rename function LRate as LearningRate

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 0b1359d929a..c173a8ed34b 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -99,9 +99,9 @@ class SMORMS3
   DecomposableFunctionType& Function() { return function; }
 
   //! Get the learning rate.
-  double LRate() const { return lRate; }
+  double LearningRate() const { return lRate; }
   //! Modify the learning rate.
-  double& LRate() { return lRate; }
+  double& LearningRate() { return lRate; }
 
   //! Get the value used to initialise the mean squared gradient parameter.
   double Epsilon() const { return eps; }

From a281494636dd6efa1419d0442fd0a80a9aa7f1ad Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Wed, 15 Mar 2017 21:35:48 +0530
Subject: [PATCH 16/78] Implement SMORMS3 optimizer as an update policy of SGD

* SMORMS3Update policy class implements the update steps of SMORMS3 algorithm.
* SMORMS3 class as a wrapper leveraging the SGD class.
---
 src/mlpack/core/optimizers/sgd/sgd.hpp        |   1 +
 .../sgd/update_policies/CMakeLists.txt        |   1 +
 .../sgd/update_policies/smorms3_update.hpp    | 108 +++++++++++++
 .../core/optimizers/smorms3/CMakeLists.txt    |   1 -
 .../core/optimizers/smorms3/smorms3.hpp       |  12 +-
 .../core/optimizers/smorms3/smorms3_impl.hpp  | 145 ------------------
 src/mlpack/tests/smorms3_test.cpp             |  11 +-
 7 files changed, 124 insertions(+), 155 deletions(-)
 create mode 100644 src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
 delete mode 100644 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp

diff --git a/src/mlpack/core/optimizers/sgd/sgd.hpp b/src/mlpack/core/optimizers/sgd/sgd.hpp
index 4dcb4eb54fe..8f7c348feeb 100644
--- a/src/mlpack/core/optimizers/sgd/sgd.hpp
+++ b/src/mlpack/core/optimizers/sgd/sgd.hpp
@@ -17,6 +17,7 @@
 #include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp>
 #include <mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp>
+#include <mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt
index 36d87a97ebe..b6e4a0dd8b0 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(SOURCES
   vanilla_update.hpp
   momentum_update.hpp
+  smorms3_update.hpp
 )
 
 set(DIR_SRCS)
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
new file mode 100644
index 00000000000..8d13cf48217
--- /dev/null
+++ b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
@@ -0,0 +1,108 @@
+/**
+ * @file smorms3_update.hpp
+ * @author Vivek Pal
+ *
+ * SMORMS3 update for Stochastic Gradient Descent.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP
+#define MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace optimization {
+
+/**
+ * SMORMS3 is an optimizer that estimates a safe and optimal distance based on
+ * curvature and normalizing the stepsize in the parameter space. It is a hybrid
+ * of RMSprop and Yann LeCun’s method in "No more pesky learning rates".
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @misc{Funk2015,
+ *   author    = {Simon Funk},
+ *   title     = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
+ *   year      = {2015}
+ * }
+ * @endcode
+ */
+
+class SMORMS3Update
+{
+ public:
+  /**
+   * Construct the SMORMS3 update policy with given epsilon parameter.
+   *
+   * @param psilon Value used to initialise the mean squared gradient parameter.
+   */
+  SMORMS3Update(const double eps = 1e-16) : eps(eps)
+  { /* Do nothing. */ };
+
+  /**
+   * The Initialize method is called by SGD::Optimize method with UpdatePolicy
+   * SMORMS3Update before the start of the iteration update process.
+   *
+   * @param n_rows number of rows in the gradient matrix.
+   * @param n_cols number of columns in the gradient matrix.
+   * @param lRate 
+   */
+  void Initialize(const size_t n_rows,
+                  const size_t n_cols)
+  {
+    // Initialise the parameters mem, g and g2.
+    mem = arma::ones<arma::mat>(n_rows, n_cols);
+    g = arma::zeros<arma::mat>(n_rows, n_cols);
+    g2 = arma::zeros<arma::mat>(n_rows, n_cols);
+
+    // Initialise a matrix to be filled with lRate.
+    lRateMat = arma::zeros<arma::mat>(n_rows, n_cols);
+  }
+
+  /**
+   * Update step for SMORMS3.
+   *
+   * @param iterate Parameter that minimizes the function.
+   * @param stepSize Step size to be used for the given iteration.
+   * @param gradient The gradient matrix.
+   */
+  void Update(arma::mat& iterate,
+              double stepSize,
+              arma::mat& gradient)
+  {
+    // Update the iterate.
+    arma::mat r = 1 / (mem + 1);
+
+    g = (1 - r) % g;
+    g += r % gradient;
+
+    g2 = (1 - r) % g2;
+    g2 += r % (gradient % gradient);
+
+    arma::mat x = (g % g) / (g2 + eps);
+
+    lRateMat.fill(stepSize);
+
+    iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps);
+
+    mem %= (1 - x);
+    mem += 1;
+  }
+ private:
+   //! The value used to initialise the mean squared gradient parameter.
+   double eps;
+   // The parameters mem, g and g2.
+   arma::mat mem, g, g2;
+   // The matrix to be filled with lRate.
+   arma::mat lRateMat;
+};
+
+} // namespace optimization
+} // namespace mlpack
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
index 68b6de814cd..cefe7c12883 100644
--- a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
@@ -1,6 +1,5 @@
 set(SOURCES
   smorms3.hpp
-  smorms3_impl.hpp
 )
 
 set(DIR_SRCS)
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index c173a8ed34b..66775be1f8e 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -15,6 +15,7 @@
 #define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP
 
 #include <mlpack/prereqs.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
 
 namespace mlpack {
 namespace optimization {
@@ -91,7 +92,10 @@ class SMORMS3
    * @param iterate Starting point (will be modified).
    * @return Objective value of the final point.
    */
-  double Optimize(arma::mat& iterate);
+  double Optimize(const arma::mat& iterate)
+  {
+    return optimizer.Optimize(iterate);
+  }
 
   //! Get the instantiated function to be optimized.
   const DecomposableFunctionType& Function() const { return function; }
@@ -142,12 +146,12 @@ class SMORMS3
   //! Controls whether or not the individual functions are shuffled when
   //! iterating.
   bool shuffle;
+
+  //! The Stochastic Gradient Descent object with SMORMS3 update policy.
+  SGD<DecomposableFunctionType, SMORMS3Update> optimizer;
 };
 
 } // namespace optimization
 } // namespace mlpack
 
-// Include implementation.
-#include "smorms3_impl.hpp"
-
 #endif
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
deleted file mode 100644
index 3259d379666..00000000000
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * @file smorms3_impl.hpp
- * @author Vivek Pal
- *
- * Implementation of the SMORMS3 optimizer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
-#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "smorms3.hpp"
-
-namespace mlpack {
-namespace optimization {
-
-template<typename DecomposableFunctionType>
-SMORMS3<DecomposableFunctionType>::SMORMS3(DecomposableFunctionType& function,
-                                     const double lRate,
-                                     const double eps,
-                                     const size_t maxIterations,
-                                     const double tolerance,
-                                     const bool shuffle) :
-    function(function),
-    lRate(lRate),
-    eps(eps),
-    maxIterations(maxIterations),
-    tolerance(tolerance),
-    shuffle(shuffle)
-{ /* Nothing to do. */ }
-
-//! Optimize the function (minimize).
-template<typename DecomposableFunctionType>
-double SMORMS3<DecomposableFunctionType>::Optimize(arma::mat& iterate)
-{
-  // Find the number of functions to use.
-  const size_t numFunctions = function.NumFunctions();
-
-  // This is used only if shuffle is true.
-  arma::Col<size_t> visitationOrder;
-  if (shuffle)
-    visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0,
-        (numFunctions - 1), numFunctions));
-
-  // To keep track of where we are and how things are going.
-  size_t currentFunction = 0;
-  double overallObjective = 0;
-  double lastObjective = DBL_MAX;
-
-  // Calculate the first objective function.
-  for (size_t i = 0; i < numFunctions; ++i)
-    overallObjective += function.Evaluate(iterate, i);
-
-  // Initialise the parameters mem, g and g2.
-  arma::mat mem = arma::ones<arma::mat>(iterate.n_rows, iterate.n_cols);
-  arma::mat g = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
-  arma::mat g2 = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
-
-  // Initialise a matrix filled with lRate.
-  arma::mat lRateMat(iterate.n_rows, iterate.n_cols);
-  lRateMat.fill(lRate);
-
-  // Now iterate!
-  arma::mat gradient(iterate.n_rows, iterate.n_cols);
-  
-  for (size_t i = 1; i != maxIterations; ++i, ++currentFunction)
-  {
-    // Is this iteration the start of a sequence?
-    if ((currentFunction % numFunctions) == 0)
-    {
-      // Output current objective function.
-      Log::Info << "SMORMS3: iteration " << i << ", objective "
-          << overallObjective << "." << std::endl;
-
-      if (std::isnan(overallObjective) || std::isinf(overallObjective))
-      {
-        Log::Warn << "SMORMS3: converged to " << overallObjective
-            << "; terminating with failure. Try a smaller step size?"
-            << std::endl;
-        return overallObjective;
-      }
-
-      if (std::abs(lastObjective - overallObjective) < tolerance)
-      {
-        Log::Info << "SMORMS3: minimized within tolerance " << tolerance << "; "
-            << "terminating optimization." << std::endl;
-        return overallObjective;
-      }
-
-      // Reset the counter variables.
-      lastObjective = overallObjective;
-      overallObjective = 0;
-      currentFunction = 0;
-
-      if (shuffle) // Determine order of visitation.
-        visitationOrder = arma::shuffle(visitationOrder);
-    }
-
-    // Evaluate the gradient for this iteration.
-    if (shuffle)
-      function.Gradient(iterate, visitationOrder[currentFunction], gradient);
-    else
-      function.Gradient(iterate, currentFunction, gradient);
-
-    // And update the iterate.
-    arma::mat r = 1 / (mem + 1);
-
-    g = (1 - r) % g;
-    g += r % gradient;
-
-    g2 = (1 - r) % g2;
-    g2 += r % (gradient % gradient);
-
-    arma::mat x = (g % g) / (g2 + eps);
-
-    iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps);
-
-    mem *= (1 - x);
-    mem += 1;
-
-    // Now add that to the overall objective function.
-    if (shuffle)
-      overallObjective += function.Evaluate(iterate,
-          visitationOrder[currentFunction]);
-    else
-      overallObjective += function.Evaluate(iterate, currentFunction);
-  }
-
-  Log::Info << "SMORMS3: maximum iterations (" << maxIterations << ") reached; "
-      << "terminating optimization." << std::endl;
-  // Calculate final objective.
-  overallObjective = 0;
-  for (size_t i = 0; i < numFunctions; ++i)
-    overallObjective += function.Evaluate(iterate, i);
-  return overallObjective;
-}
-
-} // namespace optimization
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/tests/smorms3_test.cpp b/src/mlpack/tests/smorms3_test.cpp
index 370e6d3f8de..2ca3fb81fad 100644
--- a/src/mlpack/tests/smorms3_test.cpp
+++ b/src/mlpack/tests/smorms3_test.cpp
@@ -11,7 +11,7 @@
  */
 #include <mlpack/core.hpp>
 
-#include <mlpack/core/optimizers/smorms3/smorms3.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
 #include <mlpack/core/optimizers/sgd/test_function.hpp>
 #include <mlpack/methods/logistic_regression/logistic_regression.hpp>
 
@@ -35,10 +35,11 @@ BOOST_AUTO_TEST_SUITE(SMORMS3Test);
 BOOST_AUTO_TEST_CASE(SimpleSMORMS3TestFunction)
 {
   SGDTestFunction f;
-  SMORMS3<SGDTestFunction> optimizer(f, 1e-3, 1e-16, 5000000, 1e-9, true);
+  SMORMS3Update smorms3Update(1e-16);
+  SGD<SGDTestFunction, SMORMS3Update> s(f, 1e-3, 5000000, 1e-9, true, smorms3Update);
 
   arma::mat coordinates = f.GetInitialPoint();
-  optimizer.Optimize(coordinates);
+  s.Optimize(coordinates);
 
   BOOST_REQUIRE_SMALL(coordinates[0], 0.1);
   BOOST_REQUIRE_SMALL(coordinates[1], 0.1);
@@ -95,7 +96,7 @@ BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest)
   LogisticRegression<> lr(shuffledData.n_rows, 0.5);
 
   LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  SMORMS3<LogisticRegressionFunction<> > smorms3(lrf);
+  SGD<LogisticRegressionFunction<>, SMORMS3Update > smorms3(lrf);
   lr.Train(smorms3);
 
   // Ensure that the error is close to zero.
@@ -106,4 +107,4 @@ BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest)
   BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance.
 }
 
-BOOST_AUTO_TEST_SUITE_END();
+BOOST_AUTO_TEST_SUITE_END();
\ No newline at end of file

From 3c720d3b316bc9807ecb35bc94b145fccec7b0e7 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Wed, 15 Mar 2017 21:48:56 +0530
Subject: [PATCH 17/78] Let's stick with stepSize instead of lRate

We currently use stepSize in other optimizers as well
e.g. RMSprop etc and also, consistent code looks good.
---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 66775be1f8e..0aff1420b88 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -69,7 +69,7 @@ class SMORMS3
    * equal one pass over the dataset).
    *
    * @param function Function to be optimized (minimized).
-   * @param lRate Learning rate or step size for each iteration.
+   * @param stepSize Step size for each iteration.
    * @param epsilon Value used to initialise the mean squared gradient parameter.
    * @param maxIterations Maximum number of iterations allowed (0 means no
    *        limit).
@@ -78,7 +78,7 @@ class SMORMS3
    *        function is visited in linear order.
    */
   SMORMS3(DecomposableFunctionType& function,
-      const double lRate = 0.001,
+      const double stepSize = 0.001,
       const double epsilon = 1e-16,
       const size_t maxIterations = 100000,
       const double tolerance = 1e-5,
@@ -102,10 +102,10 @@ class SMORMS3
   //! Modify the instantiated function.
   DecomposableFunctionType& Function() { return function; }
 
-  //! Get the learning rate.
-  double LearningRate() const { return lRate; }
-  //! Modify the learning rate.
-  double& LearningRate() { return lRate; }
+  //! Get the step size.
+  double StepSize() const { return stepSize; }
+  //! Modify the step size.
+  double& StepSize() { return stepSize; }
 
   //! Get the value used to initialise the mean squared gradient parameter.
   double Epsilon() const { return eps; }
@@ -131,8 +131,8 @@ class SMORMS3
   //! The instantiated function.
   DecomposableFunctionType& function;
 
-  //! The learning rate for each example.
-  double lRate;
+  //! The step size for each example.
+  double stepSize;
 
   //! The value used to initialise the mean squared gradient parameter.
   double eps;

From 4950f54b200f84074ece96d49003bf43171b3b95 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 16 Mar 2017 01:06:22 +0530
Subject: [PATCH 18/78] fixup! Let's stick with stepSize instead of lRate

---
 .../sgd/update_policies/smorms3_update.hpp           | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
index 8d13cf48217..9a7376e3de8 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
@@ -60,8 +60,8 @@ class SMORMS3Update
     g = arma::zeros<arma::mat>(n_rows, n_cols);
     g2 = arma::zeros<arma::mat>(n_rows, n_cols);
 
-    // Initialise a matrix to be filled with lRate.
-    lRateMat = arma::zeros<arma::mat>(n_rows, n_cols);
+    // Initialise a matrix to be filled with stepSize.
+    stepSizeMat = arma::zeros<arma::mat>(n_rows, n_cols);
   }
 
   /**
@@ -86,9 +86,9 @@ class SMORMS3Update
 
     arma::mat x = (g % g) / (g2 + eps);
 
-    lRateMat.fill(stepSize);
+    stepSizeMat.fill(stepSize);
 
-    iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps);
+    iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + eps);
 
     mem %= (1 - x);
     mem += 1;
@@ -98,8 +98,8 @@ class SMORMS3Update
    double eps;
    // The parameters mem, g and g2.
    arma::mat mem, g, g2;
-   // The matrix to be filled with lRate.
-   arma::mat lRateMat;
+   // The matrix to be filled with stepSize.
+   arma::mat stepSizeMat;
 };
 
 } // namespace optimization

From 782c4a93d1d97c1226602ddc16a5a82b47f21a71 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 16 Mar 2017 09:15:48 +0530
Subject: [PATCH 19/78] Address review comments with the following changes:

* Rename parameter eps as epsilon everywhere.
* Make paramter stepSize & gradient const in SMORMS3Update::Update.
* Move smorms3_update.hpp to /optimizers/smorms3 directory.
* Reset stepSizeMat only when stepSize != previousStepSize.
* Update tests.
---
 src/mlpack/core/optimizers/sgd/sgd.hpp        |  1 -
 .../core/optimizers/smorms3/CMakeLists.txt    |  2 +
 .../core/optimizers/smorms3/smorms3.hpp       | 40 +++++++++++--------
 .../core/optimizers/smorms3/smorms3_impl.hpp  | 39 ++++++++++++++++++
 .../smorms3_update.hpp                        | 21 ++++++----
 src/mlpack/tests/smorms3_test.cpp             |  7 ++--
 6 files changed, 81 insertions(+), 29 deletions(-)
 create mode 100644 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
 rename src/mlpack/core/optimizers/{sgd/update_policies => smorms3}/smorms3_update.hpp (85%)

diff --git a/src/mlpack/core/optimizers/sgd/sgd.hpp b/src/mlpack/core/optimizers/sgd/sgd.hpp
index 8f7c348feeb..4dcb4eb54fe 100644
--- a/src/mlpack/core/optimizers/sgd/sgd.hpp
+++ b/src/mlpack/core/optimizers/sgd/sgd.hpp
@@ -17,7 +17,6 @@
 #include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp>
 #include <mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp>
-#include <mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
index cefe7c12883..e77beb68b1d 100644
--- a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(SOURCES
   smorms3.hpp
+  smorms3_impl.hpp
+  smorms3_update.hpp
 )
 
 set(DIR_SRCS)
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 0aff1420b88..2bc230c83a0 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -17,6 +17,8 @@
 #include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
 
+#include "smorms3_update.hpp"
+
 namespace mlpack {
 namespace optimization {
 
@@ -92,10 +94,7 @@ class SMORMS3
    * @param iterate Starting point (will be modified).
    * @return Objective value of the final point.
    */
-  double Optimize(const arma::mat& iterate)
-  {
-    return optimizer.Optimize(iterate);
-  }
+  double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); }
 
   //! Get the instantiated function to be optimized.
   const DecomposableFunctionType& Function() const { return function; }
@@ -103,29 +102,29 @@ class SMORMS3
   DecomposableFunctionType& Function() { return function; }
 
   //! Get the step size.
-  double StepSize() const { return stepSize; }
+  double StepSize() const { return optimizer.StepSize(); }
   //! Modify the step size.
-  double& StepSize() { return stepSize; }
+  double& StepSize() { return optimizer.StepSize(); }
 
   //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return eps; }
+  double Epsilon() const { return optimizer.Epsilon(); }
   //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return eps; }
+  double& Epsilon() { return optimizer.Epsilon(); }
 
   //! Get the maximum number of iterations (0 indicates no limit).
-  size_t MaxIterations() const { return maxIterations; }
+  size_t MaxIterations() const { return optimizer.MaxIterations(); }
   //! Modify the maximum number of iterations (0 indicates no limit).
-  size_t& MaxIterations() { return maxIterations; }
+  size_t& MaxIterations() { return optimizer.MaxIterations(); }
 
   //! Get the tolerance for termination.
-  double Tolerance() const { return tolerance; }
+  double Tolerance() const { return optimizer.Tolerance(); }
   //! Modify the tolerance for termination.
-  double& Tolerance() { return tolerance; }
+  double& Tolerance() { return optimizer.Tolerance(); }
 
   //! Get whether or not the individual functions are shuffled.
-  bool Shuffle() const { return shuffle; }
+  bool Shuffle() const { return optimizer.Shuffle(); }
   //! Modify whether or not the individual functions are shuffled.
-  bool& Shuffle() { return shuffle; }
+  bool& Shuffle() { return optimizer.Shuffle(); }
 
  private:
   //! The instantiated function.
@@ -135,7 +134,7 @@ class SMORMS3
   double stepSize;
 
   //! The value used to initialise the mean squared gradient parameter.
-  double eps;
+  double epsilon;
 
   //! The maximum number of allowed iterations.
   size_t maxIterations;
@@ -148,10 +147,19 @@ class SMORMS3
   bool shuffle;
 
   //! The Stochastic Gradient Descent object with SMORMS3 update policy.
-  SGD<DecomposableFunctionType, SMORMS3Update> optimizer;
+  SGD<DecomposableFunctionType, SMORMS3Update>
+  optimizer(DecomposableFunctionType&,
+            const double,
+            const size_t,
+            const double,
+            const bool,
+            SMORMS3Update);
 };
 
 } // namespace optimization
 } // namespace mlpack
 
+// Include implementation.
+#include "smorms3_impl.hpp"
+
 #endif
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
new file mode 100644
index 00000000000..c8ab5f0e254
--- /dev/null
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -0,0 +1,39 @@
+/**
+ * @file smorms3_impl.hpp
+ * @author Vivek Pal
+ *
+ * Implementation of the SMORMS3 constructor.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
+#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "smorms3.hpp"
+
+namespace mlpack {
+namespace optimization {
+
+template<typename DecomposableFunctionType>
+SMORMS3<DecomposableFunctionType>::SMORMS3(DecomposableFunctionType& function,
+                                           const double stepSize,
+                                           const double epsilon,
+                                           const size_t maxIterations,
+                                           const double tolerance,
+                                           const bool shuffle) :
+    function(function),
+    stepSize(stepSize),
+    epsilon(epsilon),
+    maxIterations(maxIterations),
+    tolerance(tolerance),
+    shuffle(shuffle)
+{ /* Nothing to do. */ }
+
+} // namespace optimization
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
similarity index 85%
rename from src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
rename to src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index 9a7376e3de8..338cefd7a0c 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -39,9 +39,9 @@ class SMORMS3Update
   /**
    * Construct the SMORMS3 update policy with given epsilon parameter.
    *
-   * @param psilon Value used to initialise the mean squared gradient parameter.
+   * @param epsilon Value used to initialise the mean squared gradient parameter.
    */
-  SMORMS3Update(const double eps = 1e-16) : eps(eps)
+  SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon)
   { /* Do nothing. */ };
 
   /**
@@ -72,8 +72,8 @@ class SMORMS3Update
    * @param gradient The gradient matrix.
    */
   void Update(arma::mat& iterate,
-              double stepSize,
-              arma::mat& gradient)
+              const double stepSize,
+              const arma::mat& gradient)
   {
     // Update the iterate.
     arma::mat r = 1 / (mem + 1);
@@ -84,18 +84,23 @@ class SMORMS3Update
     g2 = (1 - r) % g2;
     g2 += r % (gradient % gradient);
 
-    arma::mat x = (g % g) / (g2 + eps);
+    arma::mat x = (g % g) / (g2 + epsilon);
 
-    stepSizeMat.fill(stepSize);
+    if (stepSize != previousStepSize)
+    {
+      stepSizeMat.fill(stepSize);
+    }
 
-    iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + eps);
+    iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + epsilon);
 
     mem %= (1 - x);
     mem += 1;
+
+    previousStepSize = stepSize;
   }
  private:
    //! The value used to initialise the mean squared gradient parameter.
-   double eps;
+   double epsilon, previousStepSize;
    // The parameters mem, g and g2.
    arma::mat mem, g, g2;
    // The matrix to be filled with stepSize.
diff --git a/src/mlpack/tests/smorms3_test.cpp b/src/mlpack/tests/smorms3_test.cpp
index 2ca3fb81fad..1d62556312d 100644
--- a/src/mlpack/tests/smorms3_test.cpp
+++ b/src/mlpack/tests/smorms3_test.cpp
@@ -11,7 +11,7 @@
  */
 #include <mlpack/core.hpp>
 
-#include <mlpack/core/optimizers/sgd/sgd.hpp>
+#include <mlpack/core/optimizers/smorms3/smorms3.hpp>
 #include <mlpack/core/optimizers/sgd/test_function.hpp>
 #include <mlpack/methods/logistic_regression/logistic_regression.hpp>
 
@@ -35,8 +35,7 @@ BOOST_AUTO_TEST_SUITE(SMORMS3Test);
 BOOST_AUTO_TEST_CASE(SimpleSMORMS3TestFunction)
 {
   SGDTestFunction f;
-  SMORMS3Update smorms3Update(1e-16);
-  SGD<SGDTestFunction, SMORMS3Update> s(f, 1e-3, 5000000, 1e-9, true, smorms3Update);
+  SMORMS3<SGDTestFunction> s(f, 0.001, 1e-16, 5000000, 1e-9, true);
 
   arma::mat coordinates = f.GetInitialPoint();
   s.Optimize(coordinates);
@@ -96,7 +95,7 @@ BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest)
   LogisticRegression<> lr(shuffledData.n_rows, 0.5);
 
   LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  SGD<LogisticRegressionFunction<>, SMORMS3Update > smorms3(lrf);
+  SMORMS3<LogisticRegressionFunction<> > smorms3(lrf);
   lr.Train(smorms3);
 
   // Ensure that the error is close to zero.

From c4121ff2c62e04a236450e34fd80e4f62202ffa0 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 16 Mar 2017 09:49:50 +0530
Subject: [PATCH 20/78] Rename n_rows and n_cols as rows and cols respectively

---
 .../core/optimizers/smorms3/smorms3_update.hpp       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index 338cefd7a0c..20731ed4626 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -52,16 +52,16 @@ class SMORMS3Update
    * @param n_cols number of columns in the gradient matrix.
    * @param lRate 
    */
-  void Initialize(const size_t n_rows,
-                  const size_t n_cols)
+  void Initialize(const size_t rows,
+                  const size_t cols)
   {
     // Initialise the parameters mem, g and g2.
-    mem = arma::ones<arma::mat>(n_rows, n_cols);
-    g = arma::zeros<arma::mat>(n_rows, n_cols);
-    g2 = arma::zeros<arma::mat>(n_rows, n_cols);
+    mem = arma::ones<arma::mat>(rows, cols);
+    g = arma::zeros<arma::mat>(rows, cols);
+    g2 = arma::zeros<arma::mat>(rows, cols);
 
     // Initialise a matrix to be filled with stepSize.
-    stepSizeMat = arma::zeros<arma::mat>(n_rows, n_cols);
+    stepSizeMat = arma::zeros<arma::mat>(rows, cols);
   }
 
   /**

From 7bbe4359de73c7f084460dc6d71f2b9a8f6f3026 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sat, 18 Mar 2017 19:56:54 +0530
Subject: [PATCH 21/78] No need to include smorms3_update.hpp here

It was moved to optimizer/smorms3 directory.
---
 src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt
index b6e4a0dd8b0..36d87a97ebe 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(SOURCES
   vanilla_update.hpp
   momentum_update.hpp
-  smorms3_update.hpp
 )
 
 set(DIR_SRCS)

From 0dd928840e98326929b8d644b95d881271372db1 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sat, 18 Mar 2017 19:59:04 +0530
Subject: [PATCH 22/78] The arguments not necessary to define the type

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 2bc230c83a0..1803141325f 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -147,13 +147,7 @@ class SMORMS3
   bool shuffle;
 
   //! The Stochastic Gradient Descent object with SMORMS3 update policy.
-  SGD<DecomposableFunctionType, SMORMS3Update>
-  optimizer(DecomposableFunctionType&,
-            const double,
-            const size_t,
-            const double,
-            const bool,
-            SMORMS3Update);
+  SGD<DecomposableFunctionType, SMORMS3Update> optimizer;
 };
 
 } // namespace optimization

From 24f6cec66e0008bc11b07ca786b72a729073a442 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 19 Mar 2017 04:21:13 +0530
Subject: [PATCH 23/78] Fix incorrect instantiation of SMORMS3 constructor

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp      | 3 +++
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 1803141325f..6f8756876d9 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -146,6 +146,9 @@ class SMORMS3
   //! iterating.
   bool shuffle;
 
+  //! The SMORMS3 update policy object.
+  SMORMS3Update smorms3Update;
+
   //! The Stochastic Gradient Descent object with SMORMS3 update policy.
   SGD<DecomposableFunctionType, SMORMS3Update> optimizer;
 };
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index c8ab5f0e254..9a431a2bf47 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -30,7 +30,14 @@ SMORMS3<DecomposableFunctionType>::SMORMS3(DecomposableFunctionType& function,
     epsilon(epsilon),
     maxIterations(maxIterations),
     tolerance(tolerance),
-    shuffle(shuffle)
+    shuffle(shuffle),
+    smorms3Update(epsilon),
+    optimizer(function,
+              stepSize,
+              maxIterations,
+              tolerance,
+              shuffle,
+              smorms3Update)
 { /* Nothing to do. */ }
 
 } // namespace optimization

From ac8492b0e01c4e2b615fda392b1817e81c164f6a Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 19 Mar 2017 04:22:21 +0530
Subject: [PATCH 24/78] Initialise previousStepSize before its first use

Fixes valgrind error: Conditional jump or move
depends on uninitialised value(s)
---
 src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index 20731ed4626..3d55fcd14a5 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -100,7 +100,7 @@ class SMORMS3Update
   }
  private:
    //! The value used to initialise the mean squared gradient parameter.
-   double epsilon, previousStepSize;
+   double epsilon, previousStepSize = 0;
    // The parameters mem, g and g2.
    arma::mat mem, g, g2;
    // The matrix to be filled with stepSize.

From 5fee74edec6b09abf4287e1293de79d710633622 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 19 Mar 2017 16:12:04 +0530
Subject: [PATCH 25/78] Fix a couple of comments

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 6f8756876d9..91acc7b99df 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -146,10 +146,10 @@ class SMORMS3
   //! iterating.
   bool shuffle;
 
-  //! The SMORMS3 update policy object.
+  //! The SMORMS3Update update policy object.
   SMORMS3Update smorms3Update;
 
-  //! The Stochastic Gradient Descent object with SMORMS3 update policy.
+  //! The Stochastic Gradient Descent object with SMORMS3Update update policy.
   SGD<DecomposableFunctionType, SMORMS3Update> optimizer;
 };
 

From 98b2f5a6397b4913519f01ea2be9319b92f369e6 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 19 Mar 2017 22:24:04 +0530
Subject: [PATCH 26/78] Add Epsilon function to the SMORMS3Update update policy

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp        | 4 ++--
 src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 91acc7b99df..933e2ac4225 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -107,9 +107,9 @@ class SMORMS3
   double& StepSize() { return optimizer.StepSize(); }
 
   //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return optimizer.Epsilon(); }
+  double Epsilon() const { return smorms3Update.Epsilon(); }
   //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return optimizer.Epsilon(); }
+  double& Epsilon() { return smorms3Update.Epsilon(); }
 
   //! Get the maximum number of iterations (0 indicates no limit).
   size_t MaxIterations() const { return optimizer.MaxIterations(); }
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index 3d55fcd14a5..cab6b03be93 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -44,6 +44,11 @@ class SMORMS3Update
   SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon)
   { /* Do nothing. */ };
 
+  //! Get the value used to initialise the mean squared gradient parameter.
+  double Epsilon() const { return epsilon; }
+  //! Modify the value used to initialise the mean squared gradient parameter.
+  double& Epsilon() { return epsilon; }
+
   /**
    * The Initialize method is called by SGD::Optimize method with UpdatePolicy
    * SMORMS3Update before the start of the iteration update process.

From dacb95ead88d9bc9ede98516e13dd2df54e73be5 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Wed, 22 Mar 2017 21:06:40 +0530
Subject: [PATCH 27/78] Use constructor to initialise previousStepSize variable

---
 src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index cab6b03be93..0d89c74d26c 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -41,8 +41,10 @@ class SMORMS3Update
    *
    * @param epsilon Value used to initialise the mean squared gradient parameter.
    */
-  SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon)
-  { /* Do nothing. */ };
+  SMORMS3Update(const double epsilon = 1e-16) :
+              epsilon(epsilon),
+              previousStepSize(0)
+  { /* Do nothing. */ }
 
   //! Get the value used to initialise the mean squared gradient parameter.
   double Epsilon() const { return epsilon; }
@@ -105,7 +107,7 @@ class SMORMS3Update
   }
  private:
    //! The value used to initialise the mean squared gradient parameter.
-   double epsilon, previousStepSize = 0;
+   double epsilon, previousStepSize;
    // The parameters mem, g and g2.
    arma::mat mem, g, g2;
    // The matrix to be filled with stepSize.

From b057f881ab11bd65501c696f68f465f44f8fc71c Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Wed, 22 Mar 2017 21:11:39 +0530
Subject: [PATCH 28/78] Remove those parameters from SMORMS3 class that:

are modified by the optimizer object which are
stepSize, maxIterations, tolerance and shuffle
---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp      | 13 -------------
 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp |  4 ----
 2 files changed, 17 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 933e2ac4225..82665211713 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -130,22 +130,9 @@ class SMORMS3
   //! The instantiated function.
   DecomposableFunctionType& function;
 
-  //! The step size for each example.
-  double stepSize;
-
   //! The value used to initialise the mean squared gradient parameter.
   double epsilon;
 
-  //! The maximum number of allowed iterations.
-  size_t maxIterations;
-
-  //! The tolerance for termination.
-  double tolerance;
-
-  //! Controls whether or not the individual functions are shuffled when
-  //! iterating.
-  bool shuffle;
-
   //! The SMORMS3Update update policy object.
   SMORMS3Update smorms3Update;
 
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index 9a431a2bf47..18dce55fae1 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -26,11 +26,7 @@ SMORMS3<DecomposableFunctionType>::SMORMS3(DecomposableFunctionType& function,
                                            const double tolerance,
                                            const bool shuffle) :
     function(function),
-    stepSize(stepSize),
     epsilon(epsilon),
-    maxIterations(maxIterations),
-    tolerance(tolerance),
-    shuffle(shuffle),
     smorms3Update(epsilon),
     optimizer(function,
               stepSize,

From 81b8d03faf19706416edeffc6cb0318e84eaabb6 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Tue, 28 Mar 2017 23:01:34 +0530
Subject: [PATCH 29/78] Minor changes addressing review comments

---
 src/mlpack/core/optimizers/smorms3/smorms3.hpp | 18 +++++-------------
 .../core/optimizers/smorms3/smorms3_impl.hpp   |  5 +----
 .../core/optimizers/smorms3/smorms3_update.hpp | 15 ++++++++++-----
 3 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 82665211713..41643c80236 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -34,6 +34,7 @@ namespace optimization {
  *   author    = {Simon Funk},
  *   title     = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
  *   year      = {2015}
+ *   url       = {http://sifter.org/~simon/journal/20150420.html}
  * }
  * @endcode
  *
@@ -97,9 +98,9 @@ class SMORMS3
   double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); }
 
   //! Get the instantiated function to be optimized.
-  const DecomposableFunctionType& Function() const { return function; }
+  const DecomposableFunctionType& Function() const { return optimizer.Function(); }
   //! Modify the instantiated function.
-  DecomposableFunctionType& Function() { return function; }
+  DecomposableFunctionType& Function() { return optimizer.Function(); }
 
   //! Get the step size.
   double StepSize() const { return optimizer.StepSize(); }
@@ -107,9 +108,9 @@ class SMORMS3
   double& StepSize() { return optimizer.StepSize(); }
 
   //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return smorms3Update.Epsilon(); }
+  double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); }
   //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return smorms3Update.Epsilon(); }
+  double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); }
 
   //! Get the maximum number of iterations (0 indicates no limit).
   size_t MaxIterations() const { return optimizer.MaxIterations(); }
@@ -127,15 +128,6 @@ class SMORMS3
   bool& Shuffle() { return optimizer.Shuffle(); }
 
  private:
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The value used to initialise the mean squared gradient parameter.
-  double epsilon;
-
-  //! The SMORMS3Update update policy object.
-  SMORMS3Update smorms3Update;
-
   //! The Stochastic Gradient Descent object with SMORMS3Update update policy.
   SGD<DecomposableFunctionType, SMORMS3Update> optimizer;
 };
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
index 18dce55fae1..68ced5913aa 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp
@@ -25,15 +25,12 @@ SMORMS3<DecomposableFunctionType>::SMORMS3(DecomposableFunctionType& function,
                                            const size_t maxIterations,
                                            const double tolerance,
                                            const bool shuffle) :
-    function(function),
-    epsilon(epsilon),
-    smorms3Update(epsilon),
     optimizer(function,
               stepSize,
               maxIterations,
               tolerance,
               shuffle,
-              smorms3Update)
+              SMORMS3Update(epsilon))
 { /* Nothing to do. */ }
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index 0d89c74d26c..af8de48835f 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -29,6 +29,7 @@ namespace optimization {
  *   author    = {Simon Funk},
  *   title     = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
  *   year      = {2015}
+ *   url       = {http://sifter.org/~simon/journal/20150420.html}
  * }
  * @endcode
  */
@@ -42,8 +43,8 @@ class SMORMS3Update
    * @param epsilon Value used to initialise the mean squared gradient parameter.
    */
   SMORMS3Update(const double epsilon = 1e-16) :
-              epsilon(epsilon),
-              previousStepSize(0)
+    epsilon(epsilon),
+    previousStepSize(0)
   { /* Do nothing. */ }
 
   //! Get the value used to initialise the mean squared gradient parameter.
@@ -96,20 +97,24 @@ class SMORMS3Update
     if (stepSize != previousStepSize)
     {
       stepSizeMat.fill(stepSize);
+      previousStepSize = stepSize;
     }
 
     iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + epsilon);
 
     mem %= (1 - x);
     mem += 1;
-
-    previousStepSize = stepSize;
   }
  private:
    //! The value used to initialise the mean squared gradient parameter.
-   double epsilon, previousStepSize;
+   double epsilon;
+   
+   //! The previous value of step size in each iteration of update step.
+   double previousStepSize;
+   
    // The parameters mem, g and g2.
    arma::mat mem, g, g2;
+   
    // The matrix to be filled with stepSize.
    arma::mat stepSizeMat;
 };

From ab7a5adc528beeda16419cdaad881544733bf0a9 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 30 Mar 2017 17:07:37 +0530
Subject: [PATCH 30/78] Add implementation of the Adam update policy

---
 .../core/optimizers/adam/CMakeLists.txt       |   1 +
 src/mlpack/core/optimizers/adam/adam.hpp      |  73 +++----
 src/mlpack/core/optimizers/adam/adam_impl.hpp | 154 +--------------
 .../core/optimizers/adam/adam_update.hpp      | 180 ++++++++++++++++++
 4 files changed, 218 insertions(+), 190 deletions(-)
 create mode 100644 src/mlpack/core/optimizers/adam/adam_update.hpp

diff --git a/src/mlpack/core/optimizers/adam/CMakeLists.txt b/src/mlpack/core/optimizers/adam/CMakeLists.txt
index 3cbcfd84e79..eabdbbaafa5 100644
--- a/src/mlpack/core/optimizers/adam/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/adam/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(SOURCES
   adam.hpp
   adam_impl.hpp
+  adam_update.hpp
 )
 
 set(DIR_SRCS)
diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index 62af4c6ac82..9c729b4d887 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -20,6 +20,9 @@
 
 #include <mlpack/prereqs.hpp>
 
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+#include "adam_update.hpp"
+
 namespace mlpack {
 namespace optimization {
 
@@ -105,81 +108,59 @@ class Adam
    * @param iterate Starting point (will be modified).
    * @return Objective value of the final point.
    */
-  double Optimize(arma::mat& iterate);
+  double Optimize(arma::mat& iterate){ return optimizer.Optimize(iterate); }
 
   //! Get the instantiated function to be optimized.
-  const DecomposableFunctionType& Function() const { return function; }
+  const DecomposableFunctionType& Function() const
+  {
+    return optimizer.Function();
+  }
   //! Modify the instantiated function.
-  DecomposableFunctionType& Function() { return function; }
+  DecomposableFunctionType& Function() { return optimizer.Function(); }
 
   //! Get the step size.
-  double StepSize() const { return stepSize; }
+  double StepSize() const { return optimizer.StepSize(); }
   //! Modify the step size.
-  double& StepSize() { return stepSize; }
+  double& StepSize() { return optimizer.StepSize(); }
 
   //! Get the smoothing parameter.
-  double Beta1() const { return beta1; }
+  double Beta1() const { return optimizer.UpdatePolicy().Beta1(); }
   //! Modify the smoothing parameter.
-  double& Beta1() { return beta1; }
+  double& Beta1() { return optimizer.UpdatePolicy().Beta1(); }
 
   //! Get the second moment coefficient.
-  double Beta2() const { return beta2; }
+  double Beta2() const { return optimizer.UpdatePolicy().Beta2(); }
   //! Modify the second moment coefficient.
-  double& Beta2() { return beta2; }
+  double& Beta2() { return optimizer.UpdatePolicy().Beta2(); }
 
   //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return eps; }
+  double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); }
   //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return eps; }
+  double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); }
 
   //! Get the maximum number of iterations (0 indicates no limit).
-  size_t MaxIterations() const { return maxIterations; }
+  size_t MaxIterations() const { return optimizer.MaxIterations(); }
   //! Modify the maximum number of iterations (0 indicates no limit).
-  size_t& MaxIterations() { return maxIterations; }
+  size_t& MaxIterations() { return optimizer.MaxIterations(); }
 
   //! Get the tolerance for termination.
-  double Tolerance() const { return tolerance; }
+  double Tolerance() const { return optimizer.Tolerance(); }
   //! Modify the tolerance for termination.
-  double& Tolerance() { return tolerance; }
+  double& Tolerance() { return optimizer.Tolerance(); }
 
   //! Get whether or not the individual functions are shuffled.
-  bool Shuffle() const { return shuffle; }
+  bool Shuffle() const { return optimizer.Shuffle(); }
   //! Modify whether or not the individual functions are shuffled.
-  bool& Shuffle() { return shuffle; }
+  bool& Shuffle() { return optimizer.Shuffle(); }
 
   //! Get whether or not the AdaMax optimizer is specified.
-  bool AdaMax() const { return adaMax; }
+  bool AdaMax() const { return optimizer.UpdatePolicy().AdaMax(); }
   //! Modify wehther or not the AdaMax optimizer is to be used.
-  bool& AdaMax() { return adaMax; }
+  bool& AdaMax() { return optimizer.UpdatePolicy().AdaMax(); }
 
  private:
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The step size for each example.
-  double stepSize;
-
-  //! Exponential decay rate for the first moment estimates.
-  double beta1;
-
-  //! Exponential decay rate for the weighted infinity norm estimates.
-  double beta2;
-
-  //! The value used to initialise the mean squared gradient parameter.
-  double eps;
-
-  //! The maximum number of allowed iterations.
-  size_t maxIterations;
-
-  //! The tolerance for termination.
-  double tolerance;
-
-  //! Controls whether or not the individual functions are shuffled when
-  //! iterating.
-  bool shuffle;
-
-  //! Specifies whether or not the AdaMax optimizer is to be used.
-  bool adaMax;
+  //! The Stochastic Gradient Descent object with Adam policy.
+  SGD<DecomposableFunctionType, AdamUpdate> optimizer;
 };
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/adam/adam_impl.hpp b/src/mlpack/core/optimizers/adam/adam_impl.hpp
index 8a6be66c079..b9faf1226d9 100644
--- a/src/mlpack/core/optimizers/adam/adam_impl.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_impl.hpp
@@ -26,156 +26,22 @@ Adam<DecomposableFunctionType>::Adam(DecomposableFunctionType& function,
                                      const double stepSize,
                                      const double beta1,
                                      const double beta2,
-                                     const double eps,
+                                     const double epsilon,
                                      const size_t maxIterations,
                                      const double tolerance,
                                      const bool shuffle,
                                      const bool adaMax) :
-    function(function),
-    stepSize(stepSize),
-    beta1(beta1),
-    beta2(beta2),
-    eps(eps),
-    maxIterations(maxIterations),
-    tolerance(tolerance),
-    shuffle(shuffle),
-    adaMax(adaMax)
+    optimizer(function,
+              stepSize,
+              maxIterations,
+              tolerance,
+              shuffle,
+              AdamUpdate(epsilon,
+                         beta1,
+                         beta2,
+                         adaMax))
 { /* Nothing to do. */ }
 
-//! Optimize the function (minimize).
-template<typename DecomposableFunctionType>
-double Adam<DecomposableFunctionType>::Optimize(arma::mat& iterate)
-{
-  // Find the number of functions to use.
-  const size_t numFunctions = function.NumFunctions();
-
-  // This is used only if shuffle is true.
-  arma::Col<size_t> visitationOrder;
-  if (shuffle)
-    visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0,
-        (numFunctions - 1), numFunctions));
-
-  // To keep track of where we are and how things are going.
-  size_t currentFunction = 0;
-  double overallObjective = 0;
-  double lastObjective = DBL_MAX;
-
-  // Calculate the first objective function.
-  for (size_t i = 0; i < numFunctions; ++i)
-    overallObjective += function.Evaluate(iterate, i);
-
-  // Now iterate!
-  arma::mat gradient(iterate.n_rows, iterate.n_cols);
-
-  // Exponential moving average of gradient values.
-  arma::mat m = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
-
-  /**
-   * Initialize  either the exponentially weighted infinity norm for AdaMax
-   * optimizer (u) or exponential moving average of squared gradient values
-   * for Adam optimizer (v).
-   */
-  arma::mat u, v;
-  if (adaMax)
-  {
-    u = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
-  }
-  else
-  {
-    v = arma::zeros<arma::mat>(iterate.n_rows, iterate.n_cols);
-  }
-
-  for (size_t i = 1; i != maxIterations; ++i, ++currentFunction)
-  {
-    // Is this iteration the start of a sequence?
-    if ((currentFunction % numFunctions) == 0)
-    {
-      // Output current objective function.
-      Log::Info << "Adam: iteration " << i << ", objective " << overallObjective
-          << "." << std::endl;
-
-      if (std::isnan(overallObjective) || std::isinf(overallObjective))
-      {
-        Log::Warn << "Adam: converged to " << overallObjective
-            << "; terminating with failure. Try a smaller step size?"
-            << std::endl;
-        return overallObjective;
-      }
-
-      if (std::abs(lastObjective - overallObjective) < tolerance)
-      {
-        Log::Info << "Adam: minimized within tolerance " << tolerance << "; "
-            << "terminating optimization." << std::endl;
-        return overallObjective;
-      }
-
-      // Reset the counter variables.
-      lastObjective = overallObjective;
-      overallObjective = 0;
-      currentFunction = 0;
-
-      if (shuffle) // Determine order of visitation.
-        visitationOrder = arma::shuffle(visitationOrder);
-    }
-
-    // Evaluate the gradient for this iteration.
-    if (shuffle)
-      function.Gradient(iterate, visitationOrder[currentFunction], gradient);
-    else
-      function.Gradient(iterate, currentFunction, gradient);
-
-    // And update the iterate.
-    m *= beta1;
-    m += (1 - beta1) * gradient;
-
-    if (adaMax)
-    {
-      // Update the exponentially weighted infinity norm.
-      u *= beta2;
-      u = arma::max(u, arma::abs(gradient));
-    }
-    else
-    {
-      v *= beta2;
-      v += (1 - beta2) * (gradient % gradient);
-    }
-
-    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) i);
-    const double biasCorrection2 = 1.0 - std::pow(beta2, (double) i);
-
-    if (adaMax)
-    {
-      if (biasCorrection1 != 0.0)
-        iterate -= (stepSize / biasCorrection1 * m / (u + eps));
-    }
-    else
-    {
-      /**
-       * It should be noted that the term, m / (arma::sqrt(v) + eps), in the
-       * following expression is an approximation of the following actual term;
-       * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps).
-       */
-      iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) *
-                  m / (arma::sqrt(v) + eps);
-    }
-
-    // Now add that to the overall objective function.
-    if (shuffle)
-      overallObjective += function.Evaluate(iterate,
-          visitationOrder[currentFunction]);
-    else
-      overallObjective += function.Evaluate(iterate, currentFunction);
-  }
-
-  Log::Info << "Adam: maximum iterations (" << maxIterations << ") reached; "
-      << "terminating optimization." << std::endl;
-  // Calculate final objective.
-  overallObjective = 0;
-  for (size_t i = 0; i < numFunctions; ++i)
-    overallObjective += function.Evaluate(iterate, i);
-  return overallObjective;
-}
-
 } // namespace optimization
 } // namespace mlpack
 
diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
new file mode 100644
index 00000000000..fba575f4ce5
--- /dev/null
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -0,0 +1,180 @@
+/**
+ * @file adam.hpp
+ * @author Ryan Curtin
+ * @author Vasanth Kalingeri
+ * @author Marcus Edel
+ * @author Vivek Pal
+ *
+ * Adam and AdaMax optimizer. Adam is an an algorithm for first-order gradient-
+ * -based optimization of stochastic objective functions, based on adaptive
+ * estimates of lower-order moments. AdaMax is simply a variant of Adam based
+ * on the infinity norm.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_CORE_OPTIMIZERS_ADAM_ADAM_UPDATE_HPP
+#define MLPACK_CORE_OPTIMIZERS_ADAM_ADAM_UPDATE_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace optimization {
+
+/**
+ * Adam is an optimizer that computes individual adaptive learning rates for
+ * different parameters from estimates of first and second moments of the
+ * gradients. AdaMax is a variant of Adam based on the infinity norm as given
+ * in the section 7 of the following paper.
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{Kingma2014,
+ *   author    = {Diederik P. Kingma and Jimmy Ba},
+ *   title     = {Adam: {A} Method for Stochastic Optimization},
+ *   journal   = {CoRR},
+ *   year      = {2014}
+ * }
+ * @endcode
+ *
+ */
+class AdamUpdate
+{
+ public:
+  /**
+   * Construct the Adam update policy with the given epsilon parameter.
+   *
+   * @param epsilon The epsilon value used to initialise the squared gradient
+   *        parameter.
+   */
+  AdamUpdate(const double epsilon = 1e-8,
+             const double beta1 = 0.9,
+             const double beta2 = 0.999,
+             const bool adaMax = false) :
+    epsilon(epsilon),
+    beta1(beta1),
+    beta2(beta2),
+    adaMax(adaMax)
+  {
+    // Nothing to do.
+  }
+
+  /**
+   * The Initialize method is called by SGD Optimizer method before the start of
+   * the iteration update process.
+   *
+   * @param rows number of rows in the gradient matrix.
+   * @param cols number of columns in the gradient matrix.
+   */
+  void Initialize(const size_t rows,
+                  const size_t cols)
+  {
+    m = arma::zeros<arma::mat>(rows, cols);
+    if (adaMax)
+    {
+      u = arma::zeros<arma::mat>(rows, cols);
+    }
+    else
+    {
+      v = arma::zeros<arma::mat>(rows, cols);
+    }
+  }
+
+  /**
+   * Update step for Adam.
+   *
+   * @param iterate Parameters that minimize the function.
+   * @param stepSize Step size to be used for the given iteration.
+   * @param gradient The gradient matrix.
+   */
+  void Update(arma::mat& iterate,
+              const double stepSize,
+              const arma::mat& gradient,
+              const size_t i)
+  {
+    // And update the iterate.
+    m *= beta1;
+    m += (1 - beta1) * gradient;
+
+    if (adaMax)
+    {
+      // Update the exponentially weighted infinity norm.
+      u *= beta2;
+      u = arma::max(u, arma::abs(gradient));
+    }
+    else
+    {
+      v *= beta2;
+      v += (1 - beta2) * (gradient % gradient);
+    }
+
+    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) i);
+    const double biasCorrection2 = 1.0 - std::pow(beta2, (double) i);
+
+    if (adaMax)
+    {
+      if (biasCorrection1 != 0.0)
+        iterate -= (stepSize / biasCorrection1 * m / (u + epsilon));
+    }
+    else
+    {
+      /**
+       * It should be noted that the term, m / (arma::sqrt(v) + eps), in the
+       * following expression is an approximation of the following actual term;
+       * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps).
+       */
+      iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) *
+                  m / (arma::sqrt(v) + epsilon);
+    }
+  }
+
+  //! Get the value used to initialise the squared gradient parameter.
+  double Epsilon() const { return epsilon; }
+  //! Modify the value used to initialise the squared gradient parameter.
+  double& Epsilon() { return epsilon; }
+
+  //! Get the smoothing parameter.
+  double Beta1() const { return beta1; }
+  //! Modify the smoothing parameter.
+  double& Beta1() { return beta1; }
+
+  //! Get the second moment coefficient.
+  double Beta2() const { return beta2; }
+  //! Modify the second moment coefficient.
+  double& Beta2() { return beta2; }
+
+  //! Get whether or not the AdaMax optimizer is specified.
+  bool AdaMax() const { return adaMax; }
+  //! Modify wehther or not the AdaMax optimizer is to be used.
+  bool& AdaMax() { return adaMax; }
+
+ private:
+  // The epsilon value used to initialise the squared gradient parameter.
+  double epsilon;
+
+  // The smoothing parameter.
+  double beta1;
+
+  // The second moment coefficient.
+  double beta2;
+
+  //! Specifies whether or not the AdaMax optimizer is to be used.
+  bool adaMax;
+
+  // The exponential moving average of gradient values.
+  arma::mat m;
+
+  // The exponentially weighted infinity norm.
+  arma::mat u;
+
+  // The exponential moving average of squared gradient values.
+  arma::mat v;
+};
+
+} // namespace optimization
+} // namespace mlpack
+
+#endif

From bf7e38d7657ae51ce9dfbe9a20619932501a943b Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 30 Mar 2017 17:13:21 +0530
Subject: [PATCH 31/78] Add iteration parameter "i" to the Update function

Used in the Update step of the Adam optimizer.
---
 src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp      | 3 ++-
 src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp        | 3 ++-
 src/mlpack/core/optimizers/sgd/sgd_impl.hpp                    | 2 +-
 .../core/optimizers/sgd/update_policies/momentum_update.hpp    | 3 ++-
 .../core/optimizers/sgd/update_policies/vanilla_update.hpp     | 3 ++-
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
index 391e2a88bd3..537d2d067c9 100644
--- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
+++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
@@ -83,7 +83,8 @@ class AdaDeltaUpdate
    */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient)
+              const arma::mat& gradient,
+              const size_t i)
   {
     // Accumulate gradient.
     meanSquaredGradient *= rho;
diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
index 2fb52573b1b..12634a4eff7 100644
--- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
+++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
@@ -79,7 +79,8 @@ class AdaGradUpdate
    */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient)
+              const arma::mat& gradient,
+              const size_t i)
   {
     squaredGradient += (gradient % gradient);
     iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) + epsilon);
diff --git a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
index 5ac19ba9607..ca756cb20e2 100644
--- a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
+++ b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
@@ -108,7 +108,7 @@ double SGD<DecomposableFunctionType, UpdatePolicyType>::Optimize(
       function.Gradient(iterate, currentFunction, gradient);
 
     // Use the update policy to take a step.
-    updatePolicy.Update(iterate, stepSize, gradient);
+    updatePolicy.Update(iterate, stepSize, gradient, i);
 
     // Now add that to the overall objective function.
     if (shuffle)
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
index 2947c355f90..889d7255049 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
@@ -99,7 +99,8 @@ class MomentumUpdate
    */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient)
+              const arma::mat& gradient,
+              const size_t i)
   {
     velocity = momentum * velocity - stepSize * gradient;
     iterate += velocity;
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
index 1bd85bf1c37..2c75a4b4fdc 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
@@ -52,7 +52,8 @@ class VanillaUpdate
   */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient)
+              const arma::mat& gradient,
+              const size_t i)
   {
     // Perform the vanilla SGD update.
     iterate -= stepSize * gradient;

From c0eb047e8cebe43795fc30ce7e8bfb10638ec8b3 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 30 Mar 2017 17:15:44 +0530
Subject: [PATCH 32/78] Suppress compiler warnings on unused parameter i

---
 src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp       | 2 +-
 src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp         | 2 +-
 .../core/optimizers/sgd/update_policies/momentum_update.hpp     | 2 +-
 .../core/optimizers/sgd/update_policies/vanilla_update.hpp      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
index 537d2d067c9..cb582239a8e 100644
--- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
+++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
@@ -84,7 +84,7 @@ class AdaDeltaUpdate
   void Update(arma::mat& iterate,
               const double stepSize,
               const arma::mat& gradient,
-              const size_t i)
+              const size_t /*i*/)
   {
     // Accumulate gradient.
     meanSquaredGradient *= rho;
diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
index 12634a4eff7..bc0f1296aed 100644
--- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
+++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
@@ -80,7 +80,7 @@ class AdaGradUpdate
   void Update(arma::mat& iterate,
               const double stepSize,
               const arma::mat& gradient,
-              const size_t i)
+              const size_t /*i*/)
   {
     squaredGradient += (gradient % gradient);
     iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) + epsilon);
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
index 889d7255049..7e4a1deeed3 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
@@ -100,7 +100,7 @@ class MomentumUpdate
   void Update(arma::mat& iterate,
               const double stepSize,
               const arma::mat& gradient,
-              const size_t i)
+              const size_t /*i*/)
   {
     velocity = momentum * velocity - stepSize * gradient;
     iterate += velocity;
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
index 2c75a4b4fdc..159d7f3398f 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
@@ -53,7 +53,7 @@ class VanillaUpdate
   void Update(arma::mat& iterate,
               const double stepSize,
               const arma::mat& gradient,
-              const size_t i)
+              const size_t /*i*/)
   {
     // Perform the vanilla SGD update.
     iterate -= stepSize * gradient;

From 14a0091884c41870c21474ecdb0f5cea2a509b1c Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 30 Mar 2017 19:01:37 +0530
Subject: [PATCH 33/78] Use the combination of for_each and a lambda expression

Avoids allocating the stepSizeMat completely.
---
 .../optimizers/smorms3/smorms3_update.hpp     | 20 +++----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index af8de48835f..b9f43c1f51d 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -43,8 +43,7 @@ class SMORMS3Update
    * @param epsilon Value used to initialise the mean squared gradient parameter.
    */
   SMORMS3Update(const double epsilon = 1e-16) :
-    epsilon(epsilon),
-    previousStepSize(0)
+    epsilon(epsilon)
   { /* Do nothing. */ }
 
   //! Get the value used to initialise the mean squared gradient parameter.
@@ -67,9 +66,6 @@ class SMORMS3Update
     mem = arma::ones<arma::mat>(rows, cols);
     g = arma::zeros<arma::mat>(rows, cols);
     g2 = arma::zeros<arma::mat>(rows, cols);
-
-    // Initialise a matrix to be filled with stepSize.
-    stepSizeMat = arma::zeros<arma::mat>(rows, cols);
   }
 
   /**
@@ -94,13 +90,9 @@ class SMORMS3Update
 
     arma::mat x = (g % g) / (g2 + epsilon);
 
-    if (stepSize != previousStepSize)
-    {
-      stepSizeMat.fill(stepSize);
-      previousStepSize = stepSize;
-    }
+    x.for_each( [stepSize](double &v) { v = std::min(v, stepSize); } );
 
-    iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + epsilon);
+    iterate -= gradient % x / (arma::sqrt(g2) + epsilon);
 
     mem %= (1 - x);
     mem += 1;
@@ -109,14 +101,8 @@ class SMORMS3Update
    //! The value used to initialise the mean squared gradient parameter.
    double epsilon;
    
-   //! The previous value of step size in each iteration of update step.
-   double previousStepSize;
-   
    // The parameters mem, g and g2.
    arma::mat mem, g, g2;
-   
-   // The matrix to be filled with stepSize.
-   arma::mat stepSizeMat;
 };
 
 } // namespace optimization

From 93e817b998406508ec005c73bf9464a1fc2825cd Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 30 Mar 2017 19:06:04 +0530
Subject: [PATCH 34/78] fixup! Fix a couple of comments

---
 src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index b9f43c1f51d..6617810de2b 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -55,8 +55,8 @@ class SMORMS3Update
    * The Initialize method is called by SGD::Optimize method with UpdatePolicy
    * SMORMS3Update before the start of the iteration update process.
    *
-   * @param n_rows number of rows in the gradient matrix.
-   * @param n_cols number of columns in the gradient matrix.
+   * @param rows number of rows in the gradient matrix.
+   * @param cols number of columns in the gradient matrix.
    * @param lRate 
    */
   void Initialize(const size_t rows,

From 1f0310b52fd2f8ef7cfc089b470cc05b3944405d Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 30 Mar 2017 21:04:11 +0530
Subject: [PATCH 35/78] Try transform() instead of for_each() to fix Travis
 build failure

---
 src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index 6617810de2b..2d22ae4863a 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -90,7 +90,7 @@ class SMORMS3Update
 
     arma::mat x = (g % g) / (g2 + epsilon);
 
-    x.for_each( [stepSize](double &v) { v = std::min(v, stepSize); } );
+    x.transform( [stepSize](double &v) { return std::min(v, stepSize); } );
 
     iterate -= gradient % x / (arma::sqrt(g2) + epsilon);
 

From 82933523e04e6f6da4f84f7ffca153d3462bbd95 Mon Sep 17 00:00:00 2001
From: Abhinav Moudgil <abhinavmoudgil95@gmail.com>
Date: Sat, 1 Apr 2017 03:16:50 +0530
Subject: [PATCH 36/78] Improve comments

---
 src/mlpack/tests/convolutional_network_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp
index 6d79f7605b2..26b4c28bd18 100644
--- a/src/mlpack/tests/convolutional_network_test.cpp
+++ b/src/mlpack/tests/convolutional_network_test.cpp
@@ -47,12 +47,12 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
   {
     if (i < nPoints / 2)
     {
-      // class 1 - digit = 4
+      // Assign label "1" to all samples with digit = 4
       Y(i) = 1;
     }
     else
     {
-      // class 2 - digit = 9
+      // Assign label "2" to all samples with digit = 9
       Y(i) = 2;
     }
   }

From cda0e912402461730a5091c897319296f91154ca Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sat, 1 Apr 2017 06:51:45 +0530
Subject: [PATCH 37/78] Address review comments

* Use a local variable to keep count of iterations.
* Change adaMax parameter to a template parameter.
---
 .../optimizers/ada_delta/ada_delta_update.hpp |  3 +-
 .../optimizers/ada_grad/ada_grad_update.hpp   |  3 +-
 src/mlpack/core/optimizers/adam/adam.hpp      | 14 ++-------
 src/mlpack/core/optimizers/adam/adam_impl.hpp | 26 +++++++---------
 .../core/optimizers/adam/adam_update.hpp      | 31 +++++++++----------
 src/mlpack/core/optimizers/sgd/sgd_impl.hpp   |  2 +-
 .../sgd/update_policies/momentum_update.hpp   |  3 +-
 .../sgd/update_policies/vanilla_update.hpp    |  3 +-
 .../logistic_regression.hpp                   |  2 +-
 .../logistic_regression_impl.hpp              |  2 +-
 src/mlpack/tests/adam_test.cpp                | 12 ++++---
 11 files changed, 44 insertions(+), 57 deletions(-)

diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
index cb582239a8e..391e2a88bd3 100644
--- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
+++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
@@ -83,8 +83,7 @@ class AdaDeltaUpdate
    */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient,
-              const size_t /*i*/)
+              const arma::mat& gradient)
   {
     // Accumulate gradient.
     meanSquaredGradient *= rho;
diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
index bc0f1296aed..2fb52573b1b 100644
--- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
+++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
@@ -79,8 +79,7 @@ class AdaGradUpdate
    */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient,
-              const size_t /*i*/)
+              const arma::mat& gradient)
   {
     squaredGradient += (gradient % gradient);
     iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) + epsilon);
diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index 9c729b4d887..e590c0b2813 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -64,7 +64,7 @@ namespace optimization {
  * @tparam DecomposableFunctionType Decomposable objective function type to be
  *     minimized.
  */
-template<typename DecomposableFunctionType>
+template<typename DecomposableFunctionType, bool adaMax = false>
 class Adam
 {
  public:
@@ -87,8 +87,6 @@ class Adam
    * @param tolerance Maximum absolute tolerance to terminate algorithm.
    * @param shuffle If true, the function order is shuffled; otherwise, each
    *        function is visited in linear order.
-   * @param adaMax If true, then the AdaMax optimizer is used; otherwise, by
-   *        default the Adam optimizer is used.
    */
   Adam(DecomposableFunctionType& function,
       const double stepSize = 0.001,
@@ -97,8 +95,7 @@ class Adam
       const double eps = 1e-8,
       const size_t maxIterations = 100000,
       const double tolerance = 1e-5,
-      const bool shuffle = true,
-      const bool adaMax = false);
+      const bool shuffle = true);
 
   /**
    * Optimize the given function using Adam. The given starting point will be
@@ -153,14 +150,9 @@ class Adam
   //! Modify whether or not the individual functions are shuffled.
   bool& Shuffle() { return optimizer.Shuffle(); }
 
-  //! Get whether or not the AdaMax optimizer is specified.
-  bool AdaMax() const { return optimizer.UpdatePolicy().AdaMax(); }
-  //! Modify wehther or not the AdaMax optimizer is to be used.
-  bool& AdaMax() { return optimizer.UpdatePolicy().AdaMax(); }
-
  private:
   //! The Stochastic Gradient Descent object with Adam policy.
-  SGD<DecomposableFunctionType, AdamUpdate> optimizer;
+  SGD<DecomposableFunctionType, AdamUpdate<adaMax> > optimizer;
 };
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/adam/adam_impl.hpp b/src/mlpack/core/optimizers/adam/adam_impl.hpp
index b9faf1226d9..4905a0cec24 100644
--- a/src/mlpack/core/optimizers/adam/adam_impl.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_impl.hpp
@@ -21,25 +21,23 @@
 namespace mlpack {
 namespace optimization {
 
-template<typename DecomposableFunctionType>
-Adam<DecomposableFunctionType>::Adam(DecomposableFunctionType& function,
-                                     const double stepSize,
-                                     const double beta1,
-                                     const double beta2,
-                                     const double epsilon,
-                                     const size_t maxIterations,
-                                     const double tolerance,
-                                     const bool shuffle,
-                                     const bool adaMax) :
+template<typename DecomposableFunctionType, bool adaMax>
+Adam<DecomposableFunctionType, adaMax>::Adam(DecomposableFunctionType& function,
+                                             const double stepSize,
+                                             const double beta1,
+                                             const double beta2,
+                                             const double epsilon,
+                                             const size_t maxIterations,
+                                             const double tolerance,
+                                             const bool shuffle) :
     optimizer(function,
               stepSize,
               maxIterations,
               tolerance,
               shuffle,
-              AdamUpdate(epsilon,
-                         beta1,
-                         beta2,
-                         adaMax))
+              AdamUpdate<adaMax>(epsilon,
+                                 beta1,
+                                 beta2))
 { /* Nothing to do. */ }
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index fba575f4ce5..d5a74f17b4e 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -39,8 +39,11 @@ namespace optimization {
  *   year      = {2014}
  * }
  * @endcode
- *
+ * 
+ * @param adaMax If true, then the AdaMax optimizer is used; otherwise, by
+ *        default the Adam optimizer is used.
  */
+template<bool adaMax>
 class AdamUpdate
 {
  public:
@@ -52,12 +55,11 @@ class AdamUpdate
    */
   AdamUpdate(const double epsilon = 1e-8,
              const double beta1 = 0.9,
-             const double beta2 = 0.999,
-             const bool adaMax = false) :
+             const double beta2 = 0.999) :
     epsilon(epsilon),
     beta1(beta1),
     beta2(beta2),
-    adaMax(adaMax)
+    iteration(0)
   {
     // Nothing to do.
   }
@@ -92,9 +94,11 @@ class AdamUpdate
    */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient,
-              const size_t i)
+              const arma::mat& gradient)
   {
+    // Increment the iteration counter variable.
+    ++iteration;
+
     // And update the iterate.
     m *= beta1;
     m += (1 - beta1) * gradient;
@@ -111,8 +115,8 @@ class AdamUpdate
       v += (1 - beta2) * (gradient % gradient);
     }
 
-    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) i);
-    const double biasCorrection2 = 1.0 - std::pow(beta2, (double) i);
+    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration);
+    const double biasCorrection2 = 1.0 - std::pow(beta2, (double) iteration);
 
     if (adaMax)
     {
@@ -146,11 +150,6 @@ class AdamUpdate
   //! Modify the second moment coefficient.
   double& Beta2() { return beta2; }
 
-  //! Get whether or not the AdaMax optimizer is specified.
-  bool AdaMax() const { return adaMax; }
-  //! Modify wehther or not the AdaMax optimizer is to be used.
-  bool& AdaMax() { return adaMax; }
-
  private:
   // The epsilon value used to initialise the squared gradient parameter.
   double epsilon;
@@ -161,9 +160,6 @@ class AdamUpdate
   // The second moment coefficient.
   double beta2;
 
-  //! Specifies whether or not the AdaMax optimizer is to be used.
-  bool adaMax;
-
   // The exponential moving average of gradient values.
   arma::mat m;
 
@@ -172,6 +168,9 @@ class AdamUpdate
 
   // The exponential moving average of squared gradient values.
   arma::mat v;
+
+  // The number of iterations.
+  double iteration;
 };
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
index ca756cb20e2..5ac19ba9607 100644
--- a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
+++ b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
@@ -108,7 +108,7 @@ double SGD<DecomposableFunctionType, UpdatePolicyType>::Optimize(
       function.Gradient(iterate, currentFunction, gradient);
 
     // Use the update policy to take a step.
-    updatePolicy.Update(iterate, stepSize, gradient, i);
+    updatePolicy.Update(iterate, stepSize, gradient);
 
     // Now add that to the overall objective function.
     if (shuffle)
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
index 7e4a1deeed3..2947c355f90 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
@@ -99,8 +99,7 @@ class MomentumUpdate
    */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient,
-              const size_t /*i*/)
+              const arma::mat& gradient)
   {
     velocity = momentum * velocity - stepSize * gradient;
     iterate += velocity;
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
index 159d7f3398f..1bd85bf1c37 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
@@ -52,8 +52,7 @@ class VanillaUpdate
   */
   void Update(arma::mat& iterate,
               const double stepSize,
-              const arma::mat& gradient,
-              const size_t /*i*/)
+              const arma::mat& gradient)
   {
     // Perform the vanilla SGD update.
     iterate -= stepSize * gradient;
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression.hpp b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
index 7a03a0a04e0..a19f31b876a 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
@@ -119,7 +119,7 @@ class LogisticRegression
    * @param responses Outputs results from input training variables.
    */
   template<
-      template<typename> class OptimizerType = mlpack::optimization::L_BFGS
+      template<typename...> class OptimizerType = mlpack::optimization::L_BFGS
   >
   void Train(const MatType& predictors,
              const arma::Row<size_t>& responses);
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
index 4974c975a59..94945258474 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp
@@ -66,7 +66,7 @@ LogisticRegression<MatType>::LogisticRegression(
 }
 
 template<typename MatType>
-template<template<typename> class OptimizerType>
+template<template<typename...> class OptimizerType>
 void LogisticRegression<MatType>::Train(const MatType& predictors,
                                         const arma::Row<size_t>& responses)
 {
diff --git a/src/mlpack/tests/adam_test.cpp b/src/mlpack/tests/adam_test.cpp
index 88d839fd5cf..6dae1e4320f 100644
--- a/src/mlpack/tests/adam_test.cpp
+++ b/src/mlpack/tests/adam_test.cpp
@@ -36,7 +36,8 @@ BOOST_AUTO_TEST_SUITE(AdamTest);
 BOOST_AUTO_TEST_CASE(SimpleAdamTestFunction)
 {
   SGDTestFunction f;
-  Adam<SGDTestFunction> optimizer(f, 1e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, true);
+  Adam<SGDTestFunction> optimizer(f, 1e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9,
+                                  true);
 
   arma::mat coordinates = f.GetInitialPoint();
   optimizer.Optimize(coordinates);
@@ -52,8 +53,8 @@ BOOST_AUTO_TEST_CASE(SimpleAdamTestFunction)
 BOOST_AUTO_TEST_CASE(SimpleAdaMaxTestFunction)
 {
   SGDTestFunction f;
-  Adam<SGDTestFunction> optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, true
-                                  ,true);
+  Adam<SGDTestFunction, true> optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9,
+                                        true);
 
   arma::mat coordinates = f.GetInitialPoint();
   optimizer.Optimize(coordinates);
@@ -174,8 +175,9 @@ BOOST_AUTO_TEST_CASE(AdaMaxLogisticRegressionTest)
   LogisticRegression<> lr(shuffledData.n_rows, 0.5);
 
   LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  Adam<LogisticRegressionFunction<> > adamax(lrf, 1e-3, 0.9, 0.999, 1e-8, 5000000,
-                                             1e-9, true, true);
+  Adam<LogisticRegressionFunction<>, true> adamax(lrf, 1e-3, 0.9, 0.999, 1e-8,
+                                                  5000000, 1e-9, true);
+
   lr.Train(adamax);
 
   // Ensure that the error is close to zero.

From e01bb43b2474169f03668bc90467e65efe3443a1 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 01:20:06 +0530
Subject: [PATCH 38/78] Create separate UpdateRule classes for Adam and AdaMax

---
 .../core/optimizers/adam/CMakeLists.txt       |   1 +
 src/mlpack/core/optimizers/adam/adam.hpp      |  19 ++-
 src/mlpack/core/optimizers/adam/adam_impl.hpp |  25 +--
 .../core/optimizers/adam/adam_update.hpp      |  51 ++-----
 .../core/optimizers/adam/adamax_update.hpp    | 143 ++++++++++++++++++
 src/mlpack/tests/adam_test.cpp                |   8 +-
 6 files changed, 186 insertions(+), 61 deletions(-)
 create mode 100644 src/mlpack/core/optimizers/adam/adamax_update.hpp

diff --git a/src/mlpack/core/optimizers/adam/CMakeLists.txt b/src/mlpack/core/optimizers/adam/CMakeLists.txt
index eabdbbaafa5..1377bbb0e15 100644
--- a/src/mlpack/core/optimizers/adam/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/adam/CMakeLists.txt
@@ -2,6 +2,7 @@ set(SOURCES
   adam.hpp
   adam_impl.hpp
   adam_update.hpp
+  adamax_update.hpp
 )
 
 set(DIR_SRCS)
diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index e590c0b2813..f81b3efb0e5 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -22,6 +22,7 @@
 
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
 #include "adam_update.hpp"
+#include "adamax_update.hpp"
 
 namespace mlpack {
 namespace optimization {
@@ -63,9 +64,13 @@ namespace optimization {
  *
  * @tparam DecomposableFunctionType Decomposable objective function type to be
  *     minimized.
+ * @tparam UpdateRule Adam optimizer update rule to be used.
  */
-template<typename DecomposableFunctionType, bool adaMax = false>
-class Adam
+template<
+    typename DecomposableFunctionType,
+    typename UpdateRule = AdamUpdate
+>
+class AdamType
 {
  public:
   /**
@@ -88,7 +93,7 @@ class Adam
    * @param shuffle If true, the function order is shuffled; otherwise, each
    *        function is visited in linear order.
    */
-  Adam(DecomposableFunctionType& function,
+  AdamType(DecomposableFunctionType& function,
       const double stepSize = 0.001,
       const double beta1 = 0.9,
       const double beta2 = 0.999,
@@ -152,9 +157,15 @@ class Adam
 
  private:
   //! The Stochastic Gradient Descent object with Adam policy.
-  SGD<DecomposableFunctionType, AdamUpdate<adaMax> > optimizer;
+  SGD<DecomposableFunctionType, UpdateRule> optimizer;
 };
 
+template<typename DecomposableFunctionType>
+using Adam = AdamType<DecomposableFunctionType, AdamUpdate>;
+
+template<typename DecomposableFunctionType>
+using AdaMax = AdamType<DecomposableFunctionType, AdaMaxUpdate>;
+
 } // namespace optimization
 } // namespace mlpack
 
diff --git a/src/mlpack/core/optimizers/adam/adam_impl.hpp b/src/mlpack/core/optimizers/adam/adam_impl.hpp
index 4905a0cec24..82408de71e0 100644
--- a/src/mlpack/core/optimizers/adam/adam_impl.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_impl.hpp
@@ -21,23 +21,24 @@
 namespace mlpack {
 namespace optimization {
 
-template<typename DecomposableFunctionType, bool adaMax>
-Adam<DecomposableFunctionType, adaMax>::Adam(DecomposableFunctionType& function,
-                                             const double stepSize,
-                                             const double beta1,
-                                             const double beta2,
-                                             const double epsilon,
-                                             const size_t maxIterations,
-                                             const double tolerance,
-                                             const bool shuffle) :
+template<typename DecomposableFunctionType, typename UpdateRule>
+AdamType<DecomposableFunctionType, UpdateRule>::AdamType(
+    DecomposableFunctionType& function,
+    const double stepSize,
+    const double beta1,
+    const double beta2,
+    const double epsilon,
+    const size_t maxIterations,
+    const double tolerance,
+    const bool shuffle) :
     optimizer(function,
               stepSize,
               maxIterations,
               tolerance,
               shuffle,
-              AdamUpdate<adaMax>(epsilon,
-                                 beta1,
-                                 beta2))
+              UpdateRule(epsilon,
+                         beta1,
+                         beta2))
 { /* Nothing to do. */ }
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index d5a74f17b4e..aa20e2f752d 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -39,11 +39,7 @@ namespace optimization {
  *   year      = {2014}
  * }
  * @endcode
- * 
- * @param adaMax If true, then the AdaMax optimizer is used; otherwise, by
- *        default the Adam optimizer is used.
  */
-template<bool adaMax>
 class AdamUpdate
 {
  public:
@@ -75,14 +71,7 @@ class AdamUpdate
                   const size_t cols)
   {
     m = arma::zeros<arma::mat>(rows, cols);
-    if (adaMax)
-    {
-      u = arma::zeros<arma::mat>(rows, cols);
-    }
-    else
-    {
-      v = arma::zeros<arma::mat>(rows, cols);
-    }
+    v = arma::zeros<arma::mat>(rows, cols);
   }
 
   /**
@@ -103,36 +92,19 @@ class AdamUpdate
     m *= beta1;
     m += (1 - beta1) * gradient;
 
-    if (adaMax)
-    {
-      // Update the exponentially weighted infinity norm.
-      u *= beta2;
-      u = arma::max(u, arma::abs(gradient));
-    }
-    else
-    {
-      v *= beta2;
-      v += (1 - beta2) * (gradient % gradient);
-    }
+    v *= beta2;
+    v += (1 - beta2) * (gradient % gradient);
 
     const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration);
     const double biasCorrection2 = 1.0 - std::pow(beta2, (double) iteration);
 
-    if (adaMax)
-    {
-      if (biasCorrection1 != 0.0)
-        iterate -= (stepSize / biasCorrection1 * m / (u + epsilon));
-    }
-    else
-    {
-      /**
-       * It should be noted that the term, m / (arma::sqrt(v) + eps), in the
-       * following expression is an approximation of the following actual term;
-       * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps).
-       */
-      iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) *
-                  m / (arma::sqrt(v) + epsilon);
-    }
+    /**
+     * It should be noted that the term, m / (arma::sqrt(v) + eps), in the
+     * following expression is an approximation of the following actual term;
+     * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps).
+     */
+    iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) *
+                m / (arma::sqrt(v) + epsilon);
   }
 
   //! Get the value used to initialise the squared gradient parameter.
@@ -163,9 +135,6 @@ class AdamUpdate
   // The exponential moving average of gradient values.
   arma::mat m;
 
-  // The exponentially weighted infinity norm.
-  arma::mat u;
-
   // The exponential moving average of squared gradient values.
   arma::mat v;
 
diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp
new file mode 100644
index 00000000000..7aa5b6b9908
--- /dev/null
+++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp
@@ -0,0 +1,143 @@
+/**
+ * @file adam.hpp
+ * @author Ryan Curtin
+ * @author Vasanth Kalingeri
+ * @author Marcus Edel
+ * @author Vivek Pal
+ *
+ * AdaMax update rule. Adam is an an algorithm for first-order gradient-
+ * -based optimization of stochastic objective functions, based on adaptive
+ * estimates of lower-order moments. AdaMax is simply a variant of Adam based
+ * on the infinity norm.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_CORE_OPTIMIZERS_ADAM_ADAMAX_UPDATE_HPP
+#define MLPACK_CORE_OPTIMIZERS_ADAM_ADAMAX_UPDATE_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace optimization {
+
+/**
+ * AdaMax is a variant of Adam, an optimizer that computes individual adaptive
+ * learning rates for different parameters from estimates of first and second
+ * moments of the gradients.based on the infinity norm as given in the section
+ * 7 of the following paper.
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{Kingma2014,
+ *   author    = {Diederik P. Kingma and Jimmy Ba},
+ *   title     = {Adam: {A} Method for Stochastic Optimization},
+ *   journal   = {CoRR},
+ *   year      = {2014}
+ * }
+ * @endcode
+ */
+class AdaMaxUpdate
+{
+ public:
+  /**
+   * Construct the AdaMax update policy with the given epsilon parameter.
+   *
+   * @param epsilon The epsilon value used to initialise the squared gradient
+   *        parameter.
+   */
+  AdaMaxUpdate(const double epsilon = 1e-8,
+               const double beta1 = 0.9,
+               const double beta2 = 0.999) :
+    epsilon(epsilon),
+    beta1(beta1),
+    beta2(beta2),
+    iteration(0)
+  {
+    // Nothing to do.
+  }
+
+  /**
+   * The Initialize method is called by SGD Optimizer method before the start of
+   * the iteration update process.
+   *
+   * @param rows number of rows in the gradient matrix.
+   * @param cols number of columns in the gradient matrix.
+   */
+  void Initialize(const size_t rows,
+                  const size_t cols)
+  {
+    m = arma::zeros<arma::mat>(rows, cols);
+    u = arma::zeros<arma::mat>(rows, cols);
+  }
+
+  /**
+   * Update step for Adam.
+   *
+   * @param iterate Parameters that minimize the function.
+   * @param stepSize Step size to be used for the given iteration.
+   * @param gradient The gradient matrix.
+   */
+  void Update(arma::mat& iterate,
+              const double stepSize,
+              const arma::mat& gradient)
+  {
+    // Increment the iteration counter variable.
+    ++iteration;
+
+    // And update the iterate.
+    m *= beta1;
+    m += (1 - beta1) * gradient;
+
+    // Update the exponentially weighted infinity norm.
+    u *= beta2;
+    u = arma::max(u, arma::abs(gradient));
+
+    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration);
+
+    if (biasCorrection1 != 0)
+      iterate -= (stepSize / biasCorrection1 * m / (u + epsilon));
+  }
+
+  //! Get the value used to initialise the squared gradient parameter.
+  double Epsilon() const { return epsilon; }
+  //! Modify the value used to initialise the squared gradient parameter.
+  double& Epsilon() { return epsilon; }
+
+  //! Get the smoothing parameter.
+  double Beta1() const { return beta1; }
+  //! Modify the smoothing parameter.
+  double& Beta1() { return beta1; }
+
+  //! Get the second moment coefficient.
+  double Beta2() const { return beta2; }
+  //! Modify the second moment coefficient.
+  double& Beta2() { return beta2; }
+
+ private:
+  // The epsilon value used to initialise the squared gradient parameter.
+  double epsilon;
+
+  // The smoothing parameter.
+  double beta1;
+
+  // The second moment coefficient.
+  double beta2;
+
+  // The exponential moving average of gradient values.
+  arma::mat m;
+
+  // The exponentially weighted infinity norm.
+  arma::mat u;
+
+  // The number of iterations.
+  double iteration;
+};
+
+} // namespace optimization
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/tests/adam_test.cpp b/src/mlpack/tests/adam_test.cpp
index 6dae1e4320f..a88e3618f8a 100644
--- a/src/mlpack/tests/adam_test.cpp
+++ b/src/mlpack/tests/adam_test.cpp
@@ -53,8 +53,8 @@ BOOST_AUTO_TEST_CASE(SimpleAdamTestFunction)
 BOOST_AUTO_TEST_CASE(SimpleAdaMaxTestFunction)
 {
   SGDTestFunction f;
-  Adam<SGDTestFunction, true> optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9,
-                                        true);
+  AdaMax<SGDTestFunction> optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9,
+                                    true);
 
   arma::mat coordinates = f.GetInitialPoint();
   optimizer.Optimize(coordinates);
@@ -175,8 +175,8 @@ BOOST_AUTO_TEST_CASE(AdaMaxLogisticRegressionTest)
   LogisticRegression<> lr(shuffledData.n_rows, 0.5);
 
   LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  Adam<LogisticRegressionFunction<>, true> adamax(lrf, 1e-3, 0.9, 0.999, 1e-8,
-                                                  5000000, 1e-9, true);
+  AdaMax<LogisticRegressionFunction<> > adamax(lrf, 1e-3, 0.9, 0.999, 1e-8,
+                                               5000000, 1e-9, true);
 
   lr.Train(adamax);
 

From 177690bada1700da3a94d762b0f85dffb945870c Mon Sep 17 00:00:00 2001
From: Sagar B Hathwar <sagarbhathwar@gmail.com>
Date: Sun, 2 Apr 2017 09:30:23 +0530
Subject: [PATCH 39/78] Include add_merge.hpp instead of itself in
 add_merge_impl.hpp

---
 src/mlpack/methods/ann/layer/add_merge_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/ann/layer/add_merge_impl.hpp b/src/mlpack/methods/ann/layer/add_merge_impl.hpp
index 0ef62c2d0ac..583a4c1bee6 100644
--- a/src/mlpack/methods/ann/layer/add_merge_impl.hpp
+++ b/src/mlpack/methods/ann/layer/add_merge_impl.hpp
@@ -14,7 +14,7 @@
 #define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP
 
 // In case it hasn't yet been included.
-#include "add_merge_impl.hpp"
+#include "add_merge.hpp"
 
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {

From d77444f5b1a9aa760cefb873dbb60ba873731c4a Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 19:44:47 +0530
Subject: [PATCH 40/78] Add parameters description

---
 src/mlpack/core/optimizers/adam/adam_update.hpp   | 2 ++
 src/mlpack/core/optimizers/adam/adamax_update.hpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index aa20e2f752d..71a283632a5 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -48,6 +48,8 @@ class AdamUpdate
    *
    * @param epsilon The epsilon value used to initialise the squared gradient
    *        parameter.
+   * @param beta1 The smoothing parameter.
+   * @param beta2 The second moment coefficient.
    */
   AdamUpdate(const double epsilon = 1e-8,
              const double beta1 = 0.9,
diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp
index 7aa5b6b9908..a23996f30a0 100644
--- a/src/mlpack/core/optimizers/adam/adamax_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp
@@ -48,6 +48,8 @@ class AdaMaxUpdate
    *
    * @param epsilon The epsilon value used to initialise the squared gradient
    *        parameter.
+   * @param beta1 The smoothing parameter.
+   * @param beta2 The second moment coefficient.
    */
   AdaMaxUpdate(const double epsilon = 1e-8,
                const double beta1 = 0.9,

From 565242ef2f0b85616e419e1246d4ce617330771a Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 19:45:37 +0530
Subject: [PATCH 41/78] Fix alignment issue

---
 src/mlpack/core/optimizers/adam/adam.hpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index f81b3efb0e5..37411d9bb0b 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -94,13 +94,13 @@ class AdamType
    *        function is visited in linear order.
    */
   AdamType(DecomposableFunctionType& function,
-      const double stepSize = 0.001,
-      const double beta1 = 0.9,
-      const double beta2 = 0.999,
-      const double eps = 1e-8,
-      const size_t maxIterations = 100000,
-      const double tolerance = 1e-5,
-      const bool shuffle = true);
+          const double stepSize = 0.001,
+          const double beta1 = 0.9,
+          const double beta2 = 0.999,
+          const double eps = 1e-8,
+          const size_t maxIterations = 100000,
+          const double tolerance = 1e-5,
+          const bool shuffle = true);
 
   /**
    * Optimize the given function using Adam. The given starting point will be

From 16a27fc242de1d493b18805333e2bc8c72fe8d73 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 19:55:11 +0530
Subject: [PATCH 42/78] Fix macros

---
 src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index 2d22ae4863a..c44448f05a3 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -9,8 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP
-#define MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP
+#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_UPDATE_HPP
+#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_UPDATE_HPP
 
 #include <mlpack/prereqs.hpp>
 

From d23d7eb55c822a63656b1627fad2dd66904ab287 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 19:59:12 +0530
Subject: [PATCH 43/78] Remove the comment for an unused parameter

---
 src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index c44448f05a3..fbc534926be 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -57,7 +57,6 @@ class SMORMS3Update
    *
    * @param rows number of rows in the gradient matrix.
    * @param cols number of columns in the gradient matrix.
-   * @param lRate 
    */
   void Initialize(const size_t rows,
                   const size_t cols)

From c64c610374b98ad3b9f126a19d41b40e7083963c Mon Sep 17 00:00:00 2001
From: sidak <spssachdeva21695@gmail.com>
Date: Sun, 2 Apr 2017 21:18:34 +0530
Subject: [PATCH 44/78] Correct minor typo in ffn.hpp

---
 src/mlpack/methods/ann/ffn.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
index 2bd20cfa2fb..051a5b03a69 100644
--- a/src/mlpack/methods/ann/ffn.hpp
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -204,7 +204,7 @@ class FFN
   void Gradient();
 
   /**
-   * Reset the module infomration (weights/parameters).
+   * Reset the module information (weights/parameters).
    */
   void ResetParameters();
 

From ce245d1dbf96d2b81334e9ff0f807015029a3433 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 22:37:08 +0530
Subject: [PATCH 45/78] Correct the file name

---
 src/mlpack/core/optimizers/adam/adam_update.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index 71a283632a5..c6e61b1ff42 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -1,5 +1,5 @@
 /**
- * @file adam.hpp
+ * @file adam_update.hpp
  * @author Ryan Curtin
  * @author Vasanth Kalingeri
  * @author Marcus Edel

From de7ff08cc59d7f790e9d68d8247f3aec1dcb1a17 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Tue, 4 Apr 2017 18:35:15 +0530
Subject: [PATCH 46/78] Remove comments about AdaMax in adam_update.hpp

---
 src/mlpack/core/optimizers/adam/adam_update.hpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index c6e61b1ff42..7e42132dfe2 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -5,10 +5,9 @@
  * @author Marcus Edel
  * @author Vivek Pal
  *
- * Adam and AdaMax optimizer. Adam is an an algorithm for first-order gradient-
- * -based optimization of stochastic objective functions, based on adaptive
- * estimates of lower-order moments. AdaMax is simply a variant of Adam based
- * on the infinity norm.
+ * Adam optimizer. Adam is an an algorithm for first-order gradient-based
+ * optimization of stochastic objective functions, based on adaptive estimates
+ * of lower-order moments.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
@@ -26,8 +25,7 @@ namespace optimization {
 /**
  * Adam is an optimizer that computes individual adaptive learning rates for
  * different parameters from estimates of first and second moments of the
- * gradients. AdaMax is a variant of Adam based on the infinity norm as given
- * in the section 7 of the following paper.
+ * gradients as given in the section 7 of the following paper.
  *
  * For more information, see the following.
  *

From bec9a20185e7c45973db222fbef1662c3d1b5524 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Tue, 4 Apr 2017 18:36:35 +0530
Subject: [PATCH 47/78] Remove a redundant type cast

---
 src/mlpack/core/optimizers/adam/adamax_update.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp
index a23996f30a0..17a33d2d26b 100644
--- a/src/mlpack/core/optimizers/adam/adamax_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp
@@ -98,7 +98,7 @@ class AdaMaxUpdate
     u *= beta2;
     u = arma::max(u, arma::abs(gradient));
 
-    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration);
+    const double biasCorrection1 = 1.0 - std::pow(beta1, iteration);
 
     if (biasCorrection1 != 0)
       iterate -= (stepSize / biasCorrection1 * m / (u + epsilon));

From c049330b118e7006545fb2ddbeb3bad1ec65ea79 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Tue, 4 Apr 2017 18:40:12 +0530
Subject: [PATCH 48/78] Update a few comments

---
 src/mlpack/core/optimizers/adam/adam_update.hpp   | 2 +-
 src/mlpack/core/optimizers/adam/adamax_update.hpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index 7e42132dfe2..95920c14ee7 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -42,7 +42,7 @@ class AdamUpdate
 {
  public:
   /**
-   * Construct the Adam update policy with the given epsilon parameter.
+   * Construct the Adam update policy with the given parameters.
    *
    * @param epsilon The epsilon value used to initialise the squared gradient
    *        parameter.
diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp
index 17a33d2d26b..97e26fe683d 100644
--- a/src/mlpack/core/optimizers/adam/adamax_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp
@@ -1,5 +1,5 @@
 /**
- * @file adam.hpp
+ * @file adamax_update.hpp
  * @author Ryan Curtin
  * @author Vasanth Kalingeri
  * @author Marcus Edel
@@ -44,7 +44,7 @@ class AdaMaxUpdate
 {
  public:
   /**
-   * Construct the AdaMax update policy with the given epsilon parameter.
+   * Construct the AdaMax update policy with the given parameters.
    *
    * @param epsilon The epsilon value used to initialise the squared gradient
    *        parameter.

From e879e68a77fb0cbc2ed0a1817af77ab6da9a2e7b Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 5 Apr 2017 01:21:45 +0200
Subject: [PATCH 49/78] Use auxiliary memory to avoid copy in the iteration
 step.

---
 .../randomized_block_krylov_svd.cpp           | 25 ++++++++++++-------
 .../randomized_block_krylov_svd.hpp           |  2 +-
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
index 9009c5f6422..41356e8e50a 100644
--- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
@@ -2,7 +2,7 @@
  * @file randomized_block_krylov_svd.cpp
  * @author Marcus Edel
  *
- * Implementation of the randomized SVD method.
+ * Implementation of the randomized block krylov SVD method.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
@@ -49,7 +49,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
                                      arma::mat& v,
                                      const size_t rank)
 {
-  arma::mat Q, R, block;
+  arma::mat Q, R, block, blockIteration;
 
   if (blockSize == 0)
   {
@@ -61,16 +61,23 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
 
   // Construct and orthonormalize Krlov subspace.
   arma::mat K(data.n_rows, blockSize * (maxIterations + 1));
-  arma::qr_econ(block, R, data * G);
 
-  // Copy the temporary memory to the right place.
-  K.submat(0, 0, block.n_rows - 1, block.n_cols - 1) = block;
+  // Create a working matrix using data from writable auxiliary memory
+  // (K matrix). Doing so avoids an uncessary copy in upcoming step.
+  block = arma::mat(K.memptr(), data.n_rows, blockSize, false);
+  arma::qr_econ(block, R, data * G);
 
-  for (size_t i = 0, b = block.n_cols; i < maxIterations; ++i,
-      b += block.n_cols)
+  for (size_t blockOffset = block.n_elem; blockOffset < K.n_elem;
+      blockOffset += block.n_elem)
   {
-    arma::qr_econ(block, R, data * (data.t() * block));
-    K.submat(0, b, block.n_rows - 1, b + block.n_cols - 1) = block;
+    // Temporary working matrix to store the result in the correct place.
+    blockIteration = arma::mat(K.memptr() + blockOffset, data.n_rows,
+        blockSize, false);
+
+    arma::qr_econ(blockIteration, R, data * (data.t() * block));
+
+    // Update working matrix for the next iteration.
+    block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false);
   }
 
   arma::qr_econ(Q, R, K);
diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
index f7d77ab4c50..06ef8b4c2b6 100644
--- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp
@@ -2,7 +2,7 @@
  * @file randomized_block_krylov_svd.hpp
  * @author Marcus Edel
  *
- * An implementation of the randomized SVD method.
+ * An implementation of the randomized block krylov SVD method.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the

From 0a2e8cc34e3f84db4faf2a05dec50cf63bd0d6df Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 5 Apr 2017 01:24:12 +0200
Subject: [PATCH 50/78] Minor spelling fix.

---
 .../methods/block_krylov_svd/randomized_block_krylov_svd.cpp    | 2 +-
 src/mlpack/tests/block_krylov_svd_test.cpp                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
index 41356e8e50a..e0f689b63e1 100644
--- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
@@ -59,7 +59,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
   // Random block initialization.
   arma::mat G = arma::randn(data.n_rows, blockSize);
 
-  // Construct and orthonormalize Krlov subspace.
+  // Construct and orthonormalize Krylov subspace.
   arma::mat K(data.n_rows, blockSize * (maxIterations + 1));
 
   // Create a working matrix using data from writable auxiliary memory
diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp
index 4ca9c6258d2..5d7e6e2e283 100644
--- a/src/mlpack/tests/block_krylov_svd_test.cpp
+++ b/src/mlpack/tests/block_krylov_svd_test.cpp
@@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDReconstructionError)
 /*
  * Check if the method can handle noisy matrices.
  */
-BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNosiyLowRankTest)
+BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest)
 {
   arma::mat data;
   CreateNoisyLowRankMatrix(data, 100, 1000, 5, 1.0);

From 6b097f28d5317628130aede16f019d2abe37a268 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 5 Apr 2017 20:21:59 +0200
Subject: [PATCH 51/78] It isn't guaranteed that the recurrent network test
 will converge in the specified number of iterations using random weights (if
 the test pased in 1 of 5 times that's fine).

---
 src/mlpack/tests/recurrent_network_test.cpp | 114 +++++++++++---------
 1 file changed, 64 insertions(+), 50 deletions(-)

diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp
index b726fc84ce9..73df32a209d 100644
--- a/src/mlpack/tests/recurrent_network_test.cpp
+++ b/src/mlpack/tests/recurrent_network_test.cpp
@@ -359,76 +359,90 @@ void ReberGrammarTestNetwork(bool embedded = false)
    *            .     .
    *            .......
    */
-  const size_t outputSize = 7;
-  const size_t inputSize = 7;
-  const size_t rho = trainInput.at(0, 0).n_elem / inputSize;
+  // It isn't guaranteed that the recurrent network will converge in the
+  // specified number of iterations using random weights. If this works 1 of 5
+  // times, I'm fine with that. All I want to know is that the network is able
+  // to escape from local minima and to solve the task.
+  size_t successes = 0;
+  for (size_t trial = 0; trial < 5; ++trial)
+  {
+    const size_t outputSize = 7;
+    const size_t inputSize = 7;
+    const size_t rho = trainInput.at(0, 0).n_elem / inputSize;
 
-  RNN<MeanSquaredError<> > model(rho);
+    RNN<MeanSquaredError<> > model(rho);
 
-  model.Add<IdentityLayer<> >();
-  model.Add<Linear<> >(inputSize, 20);
-  model.Add<LSTM<> >(20, 7, rho);
-  model.Add<Linear<> >(7, outputSize);
-  model.Add<SigmoidLayer<> >();
+    model.Add<IdentityLayer<> >();
+    model.Add<Linear<> >(inputSize, 20);
+    model.Add<LSTM<> >(20, 7, rho);
+    model.Add<Linear<> >(7, outputSize);
+    model.Add<SigmoidLayer<> >();
 
-  StandardSGD<decltype(model)> opt(model, 0.1, 2, -50000);
+    StandardSGD<decltype(model)> opt(model, 0.1, 2, -50000);
 
-  arma::mat inputTemp, labelsTemp;
-  for (size_t i = 0; i < 40; i++)
-  {
-    for (size_t j = 0; j < trainReberGrammarCount; j++)
+    arma::mat inputTemp, labelsTemp;
+    for (size_t i = 0; i < 20; i++)
     {
-      inputTemp = trainInput.at(0, j);
-      labelsTemp = trainLabels.at(0, j);
+      for (size_t j = 0; j < trainReberGrammarCount; j++)
+      {
+        inputTemp = trainInput.at(0, j);
+        labelsTemp = trainLabels.at(0, j);
 
-      model.Train(inputTemp, labelsTemp, opt);
+        model.Train(inputTemp, labelsTemp, opt);
+      }
     }
-  }
 
-  double error = 0;
+    double error = 0;
 
-  // Ask the network to predict the next Reber grammar in the given sequence.
-  for (size_t i = 0; i < testReberGrammarCount; i++)
-  {
-    arma::mat output, prediction;
-    arma::mat input = testInput.at(0, i);
+    // Ask the network to predict the next Reber grammar in the given sequence.
+    for (size_t i = 0; i < testReberGrammarCount; i++)
+    {
+      arma::mat output, prediction;
+      arma::mat input = testInput.at(0, i);
 
-    model.Predict(input, prediction);
-    data::Binarize(prediction, output, 0.5);
+      model.Predict(input, prediction);
+      data::Binarize(prediction, output, 0.5);
 
-    const size_t reberGrammerSize = 7;
-    std::string inputReber = "";
+      const size_t reberGrammerSize = 7;
+      std::string inputReber = "";
 
-    size_t reberError = 0;
-    for (size_t j = 0; j < (output.n_elem / reberGrammerSize); j++)
-    {
-      if (arma::sum(arma::sum(output.submat(j * reberGrammerSize, 0, (j + 1) *
-          reberGrammerSize - 1, 0))) != 1) break;
+      size_t reberError = 0;
+      for (size_t j = 0; j < (output.n_elem / reberGrammerSize); j++)
+      {
+        if (arma::sum(arma::sum(output.submat(j * reberGrammerSize, 0, (j + 1) *
+            reberGrammerSize - 1, 0))) != 1) break;
 
-      char predictedSymbol, inputSymbol;
-      std::string reberChoices;
+        char predictedSymbol, inputSymbol;
+        std::string reberChoices;
 
-      ReberReverseTranslation(output.submat(j * reberGrammerSize, 0, (j + 1) *
-          reberGrammerSize - 1, 0), predictedSymbol);
-      ReberReverseTranslation(input.submat(j * reberGrammerSize, 0, (j + 1) *
-          reberGrammerSize - 1, 0), inputSymbol);
-      inputReber += inputSymbol;
+        ReberReverseTranslation(output.submat(j * reberGrammerSize, 0, (j + 1) *
+            reberGrammerSize - 1, 0), predictedSymbol);
+        ReberReverseTranslation(input.submat(j * reberGrammerSize, 0, (j + 1) *
+            reberGrammerSize - 1, 0), inputSymbol);
+        inputReber += inputSymbol;
 
-      if (embedded)
-        GenerateNextEmbeddedReber(transitions, inputReber, reberChoices);
-      else
-        GenerateNextReber(transitions, inputReber, reberChoices);
+        if (embedded)
+          GenerateNextEmbeddedReber(transitions, inputReber, reberChoices);
+        else
+          GenerateNextReber(transitions, inputReber, reberChoices);
 
-      if (reberChoices.find(predictedSymbol) != std::string::npos)
-        reberError++;
+        if (reberChoices.find(predictedSymbol) != std::string::npos)
+          reberError++;
+      }
+
+      if (reberError != (output.n_elem / reberGrammerSize))
+        error += 1;
     }
 
-    if (reberError != (output.n_elem / reberGrammerSize))
-      error += 1;
+    error /= testReberGrammarCount;
+    if (error <= 0.2)
+    {
+      ++successes;
+      break;
+    }
   }
 
-  error /= testReberGrammarCount;
-  BOOST_REQUIRE_LE(error, 0.2);
+  BOOST_REQUIRE_GE(successes, 1);
 }
 
 /**

From 3026b5026ef1c71ae49e666ea55ecddef08634f8 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 5 Apr 2017 23:50:41 +0200
Subject: [PATCH 52/78] The default setting of strict (strict = false use the
 auxiliary memory until a size change) in versions 5.600 and earlier is true,
 so to make sure it's always false we set it explicitly.

---
 .../methods/block_krylov_svd/randomized_block_krylov_svd.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
index e0f689b63e1..702d706fd28 100644
--- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
@@ -64,7 +64,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
 
   // Create a working matrix using data from writable auxiliary memory
   // (K matrix). Doing so avoids an uncessary copy in upcoming step.
-  block = arma::mat(K.memptr(), data.n_rows, blockSize, false);
+  block = arma::mat(K.memptr(), data.n_rows, blockSize, false, false);
   arma::qr_econ(block, R, data * G);
 
   for (size_t blockOffset = block.n_elem; blockOffset < K.n_elem;
@@ -77,7 +77,8 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
     arma::qr_econ(blockIteration, R, data * (data.t() * block));
 
     // Update working matrix for the next iteration.
-    block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false);
+    block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false,
+        false);
   }
 
   arma::qr_econ(Q, R, K);

From 8116544edee856141f12a2b9a13732a83671bdd3 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 22:31:32 +0530
Subject: [PATCH 53/78] Add implementation of the RMSprop update policy

---
 .../core/optimizers/rmsprop/CMakeLists.txt    |   1 +
 .../core/optimizers/rmsprop/rmsprop.hpp       |  64 ++++------
 .../core/optimizers/rmsprop/rmsprop_impl.hpp  | 109 ++--------------
 .../optimizers/rmsprop/rmsprop_update.hpp     | 116 ++++++++++++++++++
 4 files changed, 153 insertions(+), 137 deletions(-)
 create mode 100644 src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp

diff --git a/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt b/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt
index 75c30c67bb9..da3c7681030 100644
--- a/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt
+++ b/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(SOURCES
   rmsprop.hpp
   rmsprop_impl.hpp
+  rmsprop_update.hpp
 )
 
 set(DIR_SRCS)
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
index 58969e5fcb5..31301a99b27 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
@@ -2,6 +2,7 @@
  * @file rmsprop.hpp
  * @author Ryan Curtin
  * @author Marcus Edel
+ * @author Vivek Pal
  *
  * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of
  * recent gradients to normalize the gradients.
@@ -16,6 +17,9 @@
 
 #include <mlpack/prereqs.hpp>
 
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+#include "rmsprop_update.hpp"
+
 namespace mlpack {
 namespace optimization {
 
@@ -76,7 +80,7 @@ class RMSprop
    * @param stepSize Step size for each iteration.
    * @param alpha Smoothing constant, similar to that used in AdaDelta and
    *        momentum methods.
-   * @param eps Value used to initialise the mean squared gradient parameter.
+   * @param epsilon Value used to initialise the mean squared gradient parameter.
    * @param maxIterations Maximum number of iterations allowed (0 means no
    *        limit).
    * @param tolerance Maximum absolute tolerance to terminate algorithm.
@@ -86,7 +90,7 @@ class RMSprop
   RMSprop(DecomposableFunctionType& function,
       const double stepSize = 0.01,
       const double alpha = 0.99,
-      const double eps = 1e-8,
+      const double epsilon = 1e-8,
       const size_t maxIterations = 100000,
       const double tolerance = 1e-5,
       const bool shuffle = true);
@@ -99,65 +103,49 @@ class RMSprop
    * @param iterate Starting point (will be modified).
    * @return Objective value of the final point.
    */
-  double Optimize(arma::mat& iterate);
+  double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); }
 
   //! Get the instantiated function to be optimized.
-  const DecomposableFunctionType& Function() const { return function; }
+  const DecomposableFunctionType& Function() const
+  {
+    return optimizer.Function();
+  }
   //! Modify the instantiated function.
-  DecomposableFunctionType& Function() { return function; }
+  DecomposableFunctionType& Function() { return  optimizer.Function(); }
 
   //! Get the step size.
-  double StepSize() const { return stepSize; }
+  double StepSize() const { return optimizer.StepSize(); }
   //! Modify the step size.
-  double& StepSize() { return stepSize; }
+  double& StepSize() { return optimizer.StepSize(); }
 
   //! Get the smoothing parameter.
-  double Alpha() const { return alpha; }
+  double Alpha() const { return optimizer.UpdatePolicy().Alpha(); }
   //! Modify the smoothing parameter.
-  double& Alpha() { return alpha; }
+  double& Alpha() { return optimizer.UpdatePolicy().Alpha(); }
 
   //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return eps; }
+  double Epsilon() const { return optimizer.UpdatePolicy().Alpha(); }
   //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return eps; }
+  double& Epsilon() { return optimizer.UpdatePolicy().Alpha(); }
 
   //! Get the maximum number of iterations (0 indicates no limit).
-  size_t MaxIterations() const { return maxIterations; }
+  size_t MaxIterations() const { return optimizer.MaxIterations(); }
   //! Modify the maximum number of iterations (0 indicates no limit).
-  size_t& MaxIterations() { return maxIterations; }
+  size_t& MaxIterations() { return optimizer.MaxIterations(); }
 
   //! Get the tolerance for termination.
-  double Tolerance() const { return tolerance; }
+  double Tolerance() const { return optimizer.Tolerance(); }
   //! Modify the tolerance for termination.
-  double& Tolerance() { return tolerance; }
+  double& Tolerance() { return optimizer.Tolerance(); }
 
   //! Get whether or not the individual functions are shuffled.
-  bool Shuffle() const { return shuffle; }
+  bool Shuffle() const { return optimizer.Shuffle(); }
   //! Modify whether or not the individual functions are shuffled.
-  bool& Shuffle() { return shuffle; }
+  bool& Shuffle() { return optimizer.Shuffle(); }
 
  private:
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The step size for each example.
-  double stepSize;
-
-  //! The smoothing parameter.
-  double alpha;
-
-  //! The value used to initialise the mean squared gradient parameter.
-  double eps;
-
-  //! The maximum number of allowed iterations.
-  size_t maxIterations;
-
-  //! The tolerance for termination.
-  double tolerance;
-
-  //! Controls whether or not the individual functions are shuffled when
-  //! iterating.
-  bool shuffle;
+  //! The Stochastic Gradient Descent object with RMSpropUpdate policy.
+  SGD<DecomposableFunctionType, RMSpropUpdate> optimizer;
 };
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp
index a06814b08f9..71a457e437f 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp
@@ -2,8 +2,9 @@
  * @file rmsprop_impl.hpp
  * @author Ryan Curtin
  * @author Marcus Edel
+ * @author Vivek Pal
  *
- * Implementation of the RMSprop optimizer.
+ * Implementation of the RMSprop constructor.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
@@ -23,109 +24,19 @@ template<typename DecomposableFunctionType>
 RMSprop<DecomposableFunctionType>::RMSprop(DecomposableFunctionType& function,
                                            const double stepSize,
                                            const double alpha,
-                                           const double eps,
+                                           const double epsilon,
                                            const size_t maxIterations,
                                            const double tolerance,
                                            const bool shuffle) :
-    function(function),
-    stepSize(stepSize),
-    alpha(alpha),
-    eps(eps),
-    maxIterations(maxIterations),
-    tolerance(tolerance),
-    shuffle(shuffle)
+    optimizer(function,
+              stepSize,
+              maxIterations,
+              tolerance,
+              shuffle,
+              RMSpropUpdate(epsilon,
+                            alpha))
 { /* Nothing to do. */ }
 
-//! Optimize the function (minimize).
-template<typename DecomposableFunctionType>
-double RMSprop<DecomposableFunctionType>::Optimize(arma::mat& iterate)
-{
-  // Find the number of functions to use.
-  const size_t numFunctions = function.NumFunctions();
-
-  // This is used only if shuffle is true.
-  arma::Col<size_t> visitationOrder;
-  if (shuffle)
-    visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0,
-        (numFunctions - 1), numFunctions));
-
-  // To keep track of where we are and how things are going.
-  size_t currentFunction = 0;
-  double overallObjective = 0;
-  double lastObjective = DBL_MAX;
-
-  // Calculate the first objective function.
-  for (size_t i = 0; i < numFunctions; ++i)
-    overallObjective += function.Evaluate(iterate, i);
-
-  // Now iterate!
-  arma::mat gradient(iterate.n_rows, iterate.n_cols);
-
-  // Leaky sum of squares of parameter gradient.
-  arma::mat meanSquaredGradient = arma::zeros<arma::mat>(iterate.n_rows,
-      iterate.n_cols);
-
-  for (size_t i = 1; i != maxIterations; ++i, ++currentFunction)
-  {
-    // Is this iteration the start of a sequence?
-    if ((currentFunction % numFunctions) == 0)
-    {
-      // Output current objective function.
-      Log::Info << "RMSprop: iteration " << i << ", objective "
-          << overallObjective << "." << std::endl;
-
-      if (std::isnan(overallObjective) || std::isinf(overallObjective))
-      {
-        Log::Warn << "RMSprop: converged to " << overallObjective
-            << "; terminating with failure. Try a smaller step size?"
-            << std::endl;
-        return overallObjective;
-      }
-
-      if (std::abs(lastObjective - overallObjective) < tolerance)
-      {
-        Log::Info << "RMSprop: minimized within tolerance " << tolerance << "; "
-            << "terminating optimization." << std::endl;
-        return overallObjective;
-      }
-
-      // Reset the counter variables.
-      lastObjective = overallObjective;
-      overallObjective = 0;
-      currentFunction = 0;
-
-      if (shuffle) // Determine order of visitation.
-        visitationOrder = arma::shuffle(visitationOrder);
-    }
-
-    // Evaluate the gradient for this iteration.
-    if (shuffle)
-      function.Gradient(iterate, visitationOrder[currentFunction], gradient);
-    else
-      function.Gradient(iterate, currentFunction, gradient);
-
-    // And update the iterate.
-    meanSquaredGradient *= alpha;
-    meanSquaredGradient += (1 - alpha) * (gradient % gradient);
-    iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + eps);
-
-    // Now add that to the overall objective function.
-    if (shuffle)
-      overallObjective += function.Evaluate(iterate,
-          visitationOrder[currentFunction]);
-    else
-      overallObjective += function.Evaluate(iterate, currentFunction);
-  }
-
-  Log::Info << "RMSprop: maximum iterations (" << maxIterations << ") reached; "
-      << "terminating optimization." << std::endl;
-  // Calculate final objective.
-  overallObjective = 0;
-  for (size_t i = 0; i < numFunctions; ++i)
-    overallObjective += function.Evaluate(iterate, i);
-  return overallObjective;
-}
-
 } // namespace optimization
 } // namespace mlpack
 
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
new file mode 100644
index 00000000000..61546c134c2
--- /dev/null
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -0,0 +1,116 @@
+/**
+ * @file rmsprop_update.hpp
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ * @author Vivek Pal
+ *
+ * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of
+ * recent gradients to normalize the gradients.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_UPDATE_HPP
+#define MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_UPDATE_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace optimization {
+
+/**
+ * RMSprop is an optimizer that utilizes the magnitude of recent gradients to
+ * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$
+ * and a decay term \f$ \alpha \f$ we perform the following updates:
+ *
+ * \f{eqnarray*}{
+ * r_t &=& (1 - \gamma) f'(\Delta_t)^2 + \gamma r_{t - 1} \\
+ * v_{t + 1} &=& \frac{\alpha}{\sqrt{r_t}}f'(\Delta_t) \\
+ * \Delta_{t + 1} &=& \Delta_t - v_{t + 1}
+ * \f}
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @misc{tieleman2012,
+ *   title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine
+ *   Learning},
+ *   year={2012}
+ * }
+ * @endcode
+ */
+class RMSpropUpdate
+{
+ public:
+  /**
+   * Construct the RMSprop update policy with the given epsilon parameter.
+   *
+   * @param epsilon The epsilon value used to initialise the squared gradient
+   *        parameter.
+   * @param beta1 The smoothing parameter.
+   */
+  RMSpropUpdate(const double epsilon = 1e-8,
+             const double alpha = 0.99) :
+    epsilon(epsilon),
+    alpha(alpha)
+  {
+    // Nothing to do.
+  }
+
+  /**
+   * The Initialize method is called by SGD Optimizer method before the start of
+   * the iteration update process.
+   *
+   * @param rows number of rows in the gradient matrix.
+   * @param cols number of columns in the gradient matrix.
+   */
+  void Initialize(const size_t rows,
+                  const size_t cols)
+  {
+    // Leaky sum of squares of parameter gradient.
+    meanSquaredGradient = arma::zeros<arma::mat>(rows, cols);
+  }
+
+  /**
+   * Update step for RMSprop.
+   *
+   * @param iterate Parameters that minimize the function.
+   * @param stepSize Step size to be used for the given iteration.
+   * @param gradient The gradient matrix.
+   */
+  void Update(arma::mat& iterate,
+              const double stepSize,
+              const arma::mat& gradient)
+  {
+    meanSquaredGradient *= alpha;
+    meanSquaredGradient += (1 - alpha) * (gradient % gradient);
+    iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + epsilon);
+  }
+
+  //! Get the value used to initialise the squared gradient parameter.
+  double Epsilon() const { return epsilon; }
+  //! Modify the value used to initialise the squared gradient parameter.
+  double& Epsilon() { return epsilon; }
+
+  //! Get the smoothing parameter.
+  double Alpha() const { return alpha; }
+  //! Modify the smoothing parameter.
+  double& Alpha() { return alpha; }
+
+ private:
+  // The epsilon value used to initialise the squared gradient parameter.
+  double epsilon;
+
+  // The smoothing parameter.
+  double alpha;
+
+  // Leaky sum of squares of parameter gradient.
+  arma::mat meanSquaredGradient;
+};
+
+} // namespace optimization
+} // namespace mlpack
+
+#endif
\ No newline at end of file

From 2c65ade104c5a3533ed6f42de52481c267492d6a Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 22:34:42 +0530
Subject: [PATCH 54/78] Fix a mistake

---
 src/mlpack/core/optimizers/rmsprop/rmsprop.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
index 31301a99b27..f2d9a084399 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
@@ -124,9 +124,9 @@ class RMSprop
   double& Alpha() { return optimizer.UpdatePolicy().Alpha(); }
 
   //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return optimizer.UpdatePolicy().Alpha(); }
+  double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); }
   //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return optimizer.UpdatePolicy().Alpha(); }
+  double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); }
 
   //! Get the maximum number of iterations (0 indicates no limit).
   size_t MaxIterations() const { return optimizer.MaxIterations(); }

From 612c5d98a85c42196bbb00c54f965d3b246eb803 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Sun, 2 Apr 2017 22:43:09 +0530
Subject: [PATCH 55/78] Fix a typo: RmsProp -> RMSprop

---
 src/mlpack/core/optimizers/rmsprop/rmsprop.hpp        | 2 +-
 src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
index f2d9a084399..54136aca424 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
@@ -4,7 +4,7 @@
  * @author Marcus Edel
  * @author Vivek Pal
  *
- * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of
+ * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of
  * recent gradients to normalize the gradients.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
index 61546c134c2..d18dfc85ea5 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -4,7 +4,7 @@
  * @author Marcus Edel
  * @author Vivek Pal
  *
- * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of
+ * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of
  * recent gradients to normalize the gradients.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the

From c1dd7308c1dddf72c8216bd3e1898ee53b90146e Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Tue, 4 Apr 2017 18:13:09 +0530
Subject: [PATCH 56/78] Fix alignment issues

---
 src/mlpack/core/optimizers/rmsprop/rmsprop.hpp       | 12 ++++++------
 .../core/optimizers/rmsprop/rmsprop_update.hpp       |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
index 54136aca424..782c7e38c6d 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
@@ -88,12 +88,12 @@ class RMSprop
    *        function is visited in linear order.
    */
   RMSprop(DecomposableFunctionType& function,
-      const double stepSize = 0.01,
-      const double alpha = 0.99,
-      const double epsilon = 1e-8,
-      const size_t maxIterations = 100000,
-      const double tolerance = 1e-5,
-      const bool shuffle = true);
+          const double stepSize = 0.01,
+          const double alpha = 0.99,
+          const double epsilon = 1e-8,
+          const size_t maxIterations = 100000,
+          const double tolerance = 1e-5,
+          const bool shuffle = true);
 
   /**
    * Optimize the given function using RMSprop. The given starting point will be
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
index d18dfc85ea5..e5195f034d4 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -52,7 +52,7 @@ class RMSpropUpdate
    * @param beta1 The smoothing parameter.
    */
   RMSpropUpdate(const double epsilon = 1e-8,
-             const double alpha = 0.99) :
+                const double alpha = 0.99) :
     epsilon(epsilon),
     alpha(alpha)
   {

From b8a51f33482d964d08389fe0820e92f8ffb8d772 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Tue, 4 Apr 2017 18:14:15 +0530
Subject: [PATCH 57/78] Correct the smoothing parameter name

---
 src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
index e5195f034d4..d1451246784 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -49,7 +49,7 @@ class RMSpropUpdate
    *
    * @param epsilon The epsilon value used to initialise the squared gradient
    *        parameter.
-   * @param beta1 The smoothing parameter.
+   * @param alpha The smoothing parameter.
    */
   RMSpropUpdate(const double epsilon = 1e-8,
                 const double alpha = 0.99) :

From 1b9cfeafdea1ea0ec0c80bda10ec0eb93722004b Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Tue, 4 Apr 2017 18:26:33 +0530
Subject: [PATCH 58/78] Update the comment about the RMSpropUpdate constructor

---
 src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
index d1451246784..750489b282a 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -45,7 +45,7 @@ class RMSpropUpdate
 {
  public:
   /**
-   * Construct the RMSprop update policy with the given epsilon parameter.
+   * Construct the RMSprop update policy with the given parameters.
    *
    * @param epsilon The epsilon value used to initialise the squared gradient
    *        parameter.

From 73b5c68f0a53a795e36fd234fddc6e8b05ed91e4 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 6 Apr 2017 21:47:47 +0530
Subject: [PATCH 59/78] Rename RMSprop as RMSProp

---
 src/mlpack/core/optimizers/rmsprop/rmsprop.hpp | 18 +++++++++---------
 .../core/optimizers/rmsprop/rmsprop_impl.hpp   |  6 +++---
 .../core/optimizers/rmsprop/rmsprop_update.hpp | 12 ++++++------
 src/mlpack/methods/ann/ffn.hpp                 |  6 +++---
 src/mlpack/tests/feedforward_network_test.cpp  |  6 +++---
 src/mlpack/tests/ksinit_test.cpp               |  2 +-
 src/mlpack/tests/rmsprop_test.cpp              | 12 ++++++------
 7 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
index 782c7e38c6d..4d67dd906b2 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
@@ -4,7 +4,7 @@
  * @author Marcus Edel
  * @author Vivek Pal
  *
- * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of
+ * RMSProp optimizer. RMSProp is an optimizer that utilizes the magnitude of
  * recent gradients to normalize the gradients.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
@@ -24,7 +24,7 @@ namespace mlpack {
 namespace optimization {
 
 /**
- * RMSprop is an optimizer that utilizes the magnitude of recent gradients to
+ * RMSProp is an optimizer that utilizes the magnitude of recent gradients to
  * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$
  * and a decay term \f$ \alpha \f$ we perform the following updates:
  *
@@ -44,7 +44,7 @@ namespace optimization {
  * }
  * @endcode
  *
- * For RMSprop to work, a DecomposableFunctionType template parameter is
+ * For RMSProp to work, a DecomposableFunctionType template parameter is
  * required. This class must implement the following function:
  *
  *   size_t NumFunctions();
@@ -65,11 +65,11 @@ namespace optimization {
  *     minimized.
  */
 template<typename DecomposableFunctionType>
-class RMSprop
+class RMSProp
 {
  public:
   /**
-   * Construct the RMSprop optimizer with the given function and parameters. The
+   * Construct the RMSProp optimizer with the given function and parameters. The
    * defaults here are not necessarily good for the given problem, so it is
    * suggested that the values used be tailored to the task at hand.  The
    * maximum number of iterations refers to the maximum number of points that
@@ -87,7 +87,7 @@ class RMSprop
    * @param shuffle If true, the function order is shuffled; otherwise, each
    *        function is visited in linear order.
    */
-  RMSprop(DecomposableFunctionType& function,
+  RMSProp(DecomposableFunctionType& function,
           const double stepSize = 0.01,
           const double alpha = 0.99,
           const double epsilon = 1e-8,
@@ -96,7 +96,7 @@ class RMSprop
           const bool shuffle = true);
 
   /**
-   * Optimize the given function using RMSprop. The given starting point will be
+   * Optimize the given function using RMSProp. The given starting point will be
    * modified to store the finishing point of the algorithm, and the final
    * objective value is returned.
    *
@@ -144,8 +144,8 @@ class RMSprop
   bool& Shuffle() { return optimizer.Shuffle(); }
 
  private:
-  //! The Stochastic Gradient Descent object with RMSpropUpdate policy.
-  SGD<DecomposableFunctionType, RMSpropUpdate> optimizer;
+  //! The Stochastic Gradient Descent object with RMSPropUpdate policy.
+  SGD<DecomposableFunctionType, RMSPropUpdate> optimizer;
 };
 
 } // namespace optimization
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp
index 71a457e437f..3dcd8606954 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp
@@ -4,7 +4,7 @@
  * @author Marcus Edel
  * @author Vivek Pal
  *
- * Implementation of the RMSprop constructor.
+ * Implementation of the RMSProp constructor.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
  * terms of the 3-clause BSD license.  You should have received a copy of the
@@ -21,7 +21,7 @@ namespace mlpack {
 namespace optimization {
 
 template<typename DecomposableFunctionType>
-RMSprop<DecomposableFunctionType>::RMSprop(DecomposableFunctionType& function,
+RMSProp<DecomposableFunctionType>::RMSProp(DecomposableFunctionType& function,
                                            const double stepSize,
                                            const double alpha,
                                            const double epsilon,
@@ -33,7 +33,7 @@ RMSprop<DecomposableFunctionType>::RMSprop(DecomposableFunctionType& function,
               maxIterations,
               tolerance,
               shuffle,
-              RMSpropUpdate(epsilon,
+              RMSPropUpdate(epsilon,
                             alpha))
 { /* Nothing to do. */ }
 
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
index 750489b282a..5ca0a900348 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -4,7 +4,7 @@
  * @author Marcus Edel
  * @author Vivek Pal
  *
- * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of
+ * RMSProp optimizer. RMSProp is an optimizer that utilizes the magnitude of
  * recent gradients to normalize the gradients.
  *
  * mlpack is free software; you may redistribute it and/or modify it under the
@@ -21,7 +21,7 @@ namespace mlpack {
 namespace optimization {
 
 /**
- * RMSprop is an optimizer that utilizes the magnitude of recent gradients to
+ * RMS{rop is an optimizer that utilizes the magnitude of recent gradients to
  * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$
  * and a decay term \f$ \alpha \f$ we perform the following updates:
  *
@@ -41,17 +41,17 @@ namespace optimization {
  * }
  * @endcode
  */
-class RMSpropUpdate
+class RMSPropUpdate
 {
  public:
   /**
-   * Construct the RMSprop update policy with the given parameters.
+   * Construct the RMSProp update policy with the given parameters.
    *
    * @param epsilon The epsilon value used to initialise the squared gradient
    *        parameter.
    * @param alpha The smoothing parameter.
    */
-  RMSpropUpdate(const double epsilon = 1e-8,
+  RMSPropUpdate(const double epsilon = 1e-8,
                 const double alpha = 0.99) :
     epsilon(epsilon),
     alpha(alpha)
@@ -74,7 +74,7 @@ class RMSpropUpdate
   }
 
   /**
-   * Update step for RMSprop.
+   * Update step for RMSProp.
    *
    * @param iterate Parameters that minimize the function.
    * @param stepSize Step size to be used for the given iteration.
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
index 051a5b03a69..466900bbca0 100644
--- a/src/mlpack/methods/ann/ffn.hpp
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -93,7 +93,7 @@ class FFN
    */
   template<
       template<typename, typename...> class OptimizerType =
-          mlpack::optimization::RMSprop,
+          mlpack::optimization::RMSProp,
       typename... OptimizerTypeArgs
   >
   void Train(const arma::mat& predictors,
@@ -102,7 +102,7 @@ class FFN
 
   /**
    * Train the feedforward network on the given input data. By default, the
-   * RMSprop optimization algorithm is used, but others can be specified
+   * RMSProp optimization algorithm is used, but others can be specified
    * (such as mlpack::optimization::SGD).
    *
    * This will use the existing model parameters as a starting point for the
@@ -114,7 +114,7 @@ class FFN
    * @param responses Outputs results from input training variables.
    */
   template<
-      template<typename...> class OptimizerType = mlpack::optimization::RMSprop
+      template<typename...> class OptimizerType = mlpack::optimization::RMSProp
   >
   void Train(const arma::mat& predictors, const arma::mat& responses);
 
diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp
index 883fe9e6b00..e76e83bfe21 100644
--- a/src/mlpack/tests/feedforward_network_test.cpp
+++ b/src/mlpack/tests/feedforward_network_test.cpp
@@ -66,7 +66,7 @@ void BuildVanillaNetwork(MatType& trainData,
   model.Add<Linear<> >(hiddenLayerSize, outputSize);
   model.Add<LogSoftMax<> >();
 
-  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
+  RMSProp<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
       maxEpochs * trainData.n_cols, -1);
 
   model.Train(std::move(trainData), std::move(trainLabels), opt);
@@ -194,7 +194,7 @@ void BuildDropoutNetwork(MatType& trainData,
   model.Add<Linear<> >(hiddenLayerSize, outputSize);
   model.Add<LogSoftMax<> >();
 
-  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
+  RMSProp<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
       maxEpochs * trainData.n_cols, -1);
 
   model.Train(std::move(trainData), std::move(trainLabels), opt);
@@ -324,7 +324,7 @@ void BuildDropConnectNetwork(MatType& trainData,
   model.Add<DropConnect<> >(hiddenLayerSize, outputSize);
   model.Add<LogSoftMax<> >();
 
-  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
+  RMSProp<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
       maxEpochs * trainData.n_cols, -1);
 
   model.Train(std::move(trainData), std::move(trainLabels), opt);
diff --git a/src/mlpack/tests/ksinit_test.cpp b/src/mlpack/tests/ksinit_test.cpp
index 1b42d8bfa0a..16ec7457e42 100644
--- a/src/mlpack/tests/ksinit_test.cpp
+++ b/src/mlpack/tests/ksinit_test.cpp
@@ -85,7 +85,7 @@ void BuildVanillaNetwork(MatType& trainData,
   model.Add<LeakyReLU<> >();
   model.Add<Linear<> >(hiddenLayerSize, outputSize);
 
-  RMSprop<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
+  RMSProp<decltype(model)> opt(model, 0.01, 0.88, 1e-8,
       maxEpochs * trainData.n_cols, 1e-18);
 
   model.Train(std::move(trainData), std::move(trainLabels), opt);
diff --git a/src/mlpack/tests/rmsprop_test.cpp b/src/mlpack/tests/rmsprop_test.cpp
index 831df74302d..19275588dff 100644
--- a/src/mlpack/tests/rmsprop_test.cpp
+++ b/src/mlpack/tests/rmsprop_test.cpp
@@ -27,15 +27,15 @@ using namespace mlpack::optimization::test;
 using namespace mlpack::distribution;
 using namespace mlpack::regression;
 
-BOOST_AUTO_TEST_SUITE(RMSpropTest);
+BOOST_AUTO_TEST_SUITE(RMSPropTest);
 
 /**
- * Tests the RMSprop optimizer using a simple test function.
+ * Tests the RMSProp optimizer using a simple test function.
  */
-BOOST_AUTO_TEST_CASE(SimpleRMSpropTestFunction)
+BOOST_AUTO_TEST_CASE(SimpleRMSPropTestFunction)
 {
   SGDTestFunction f;
-  RMSprop<SGDTestFunction> optimizer(f, 1e-3, 0.99, 1e-8, 5000000, 1e-9, true);
+  RMSProp<SGDTestFunction> optimizer(f, 1e-3, 0.99, 1e-8, 5000000, 1e-9, true);
 
   arma::mat coordinates = f.GetInitialPoint();
   optimizer.Optimize(coordinates);
@@ -46,7 +46,7 @@ BOOST_AUTO_TEST_CASE(SimpleRMSpropTestFunction)
 }
 
 /**
- * Run RMSprop on logistic regression and make sure the results are acceptable.
+ * Run RMSProp on logistic regression and make sure the results are acceptable.
  */
 BOOST_AUTO_TEST_CASE(LogisticRegressionTest)
 {
@@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(LogisticRegressionTest)
   LogisticRegression<> lr(shuffledData.n_rows, 0.5);
 
   LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  RMSprop<LogisticRegressionFunction<> > rmsprop(lrf);
+  RMSProp<LogisticRegressionFunction<> > rmsprop(lrf);
   lr.Train(rmsprop);
 
   // Ensure that the error is close to zero.

From 408f0e8352b0cdeda6c049fa8c79ee252a01db8e Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 6 Apr 2017 21:50:20 +0530
Subject: [PATCH 60/78] fixup! Rename RMSprop as RMSProp

---
 src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
index 5ca0a900348..b9a821a8c48 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -21,7 +21,7 @@ namespace mlpack {
 namespace optimization {
 
 /**
- * RMS{rop is an optimizer that utilizes the magnitude of recent gradients to
+ * RMSProp is an optimizer that utilizes the magnitude of recent gradients to
  * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$
  * and a decay term \f$ \alpha \f$ we perform the following updates:
  *

From d6e82692738e444c6a95fe14e925e5fec203c795 Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Thu, 6 Apr 2017 22:01:34 +0530
Subject: [PATCH 61/78] Rename RMSprop as RMSProp in
 convolutional_network_test.cpp

---
 src/mlpack/tests/convolutional_network_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp
index 26b4c28bd18..ea54a4ab95b 100644
--- a/src/mlpack/tests/convolutional_network_test.cpp
+++ b/src/mlpack/tests/convolutional_network_test.cpp
@@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
   model.Add<Linear<> >(10, 2);
   model.Add<LogSoftMax<> >();
 
-  RMSprop<decltype(model)> opt(model, 0.001, 0.88, 1e-8, 5000, -1);
+  RMSProp<decltype(model)> opt(model, 0.001, 0.88, 1e-8, 5000, -1);
 
   model.Train(std::move(X), std::move(Y), opt);
 

From 47ab60c6e3b83cce701583d95b66a64ccd1bf390 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 12 Apr 2017 17:02:32 +0200
Subject: [PATCH 62/78] Use the correct size for the random block
 initialization.

---
 .../methods/block_krylov_svd/randomized_block_krylov_svd.cpp    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
index 702d706fd28..bef09fd3992 100644
--- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
@@ -57,7 +57,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
   }
 
   // Random block initialization.
-  arma::mat G = arma::randn(data.n_rows, blockSize);
+  arma::mat G = arma::randn(data.n_cols, blockSize);
 
   // Construct and orthonormalize Krylov subspace.
   arma::mat K(data.n_rows, blockSize * (maxIterations + 1));

From 4fd1d707156d20d14d0464cb7a62e5d83a731bd5 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Wed, 12 Apr 2017 17:05:56 +0200
Subject: [PATCH 63/78] Fix dimension mismatch, that occurs if rows != cols and
 icrease the number of iterations (power method) to stabilize the test case.

---
 src/mlpack/tests/block_krylov_svd_test.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp
index 5d7e6e2e283..44b3302b9c8 100644
--- a/src/mlpack/tests/block_krylov_svd_test.cpp
+++ b/src/mlpack/tests/block_krylov_svd_test.cpp
@@ -39,7 +39,8 @@ void CreateNoisyLowRankMatrix(arma::mat& data,
       arma::exp(-1.0 * arma::pow((ids / rank), 2)));
   arma::vec tail = strength * arma::exp(-0.1 * ids / rank);
 
-  arma::mat s = arma::eye<arma::mat>(n, n) * (lowRank + tail);
+  arma::mat s = arma::zeros<arma::mat>(n, n);
+  s.diag() = lowRank + tail;
   data = (U * s) * V.t();
 }
 
@@ -102,7 +103,7 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest)
 
   arma::svd_econ(U1, s1, V1, data);
 
-  svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 1, rank, 5);
+  svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 5, rank, 5);
 
   double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank)));
   BOOST_REQUIRE_SMALL(error, 0.1);

From d5358b7f399206431b99e1b24d0ecf1afb87e99f Mon Sep 17 00:00:00 2001
From: CodeAi <benjamin.bales@assrc.us>
Date: Wed, 12 Apr 2017 11:28:41 -0400
Subject: [PATCH 64/78] CodeAi fixed 4 null pointer dereferences by tightening
 the if condition to check the potentially null pointer variables before use
 in the constructor.

---
 .../methods/hoeffding_trees/hoeffding_tree_model.cpp      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp
index 0599a4ce08c..9b35b531529 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp
@@ -68,13 +68,13 @@ HoeffdingTreeModel& HoeffdingTreeModel::operator=(
 
   // Create the right tree.
   type = other.type;
-  if (type == GINI_HOEFFDING)
+  if (other.giniHoeffdingTree && (type == GINI_HOEFFDING))
     giniHoeffdingTree = new GiniHoeffdingTreeType(*other.giniHoeffdingTree);
-  else if (type == GINI_BINARY)
+  else if (other.giniBinaryTree && (type == GINI_BINARY))
     giniBinaryTree = new GiniBinaryTreeType(*other.giniBinaryTree);
-  else if (type == INFO_HOEFFDING)
+  else if (other.infoHoeffdingTree && (type == INFO_HOEFFDING))
     infoHoeffdingTree = new InfoHoeffdingTreeType(*other.infoHoeffdingTree);
-  else if (type == INFO_BINARY)
+  else if (other.infoBinaryTree && (type == INFO_BINARY))
     infoBinaryTree = new InfoBinaryTreeType(*other.infoBinaryTree);
 
   return *this;

From 3f7d13803b7119270157aa12f2e6aaabd5d85959 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Wed, 12 Apr 2017 17:52:01 -0400
Subject: [PATCH 65/78] Use std::forward and universal references to simplify.

---
 src/mlpack/methods/emst/dtb_impl.hpp          | 25 ++++----
 .../methods/kmeans/dual_tree_kmeans_impl.hpp  | 22 ++++---
 .../neighbor_search/neighbor_search_impl.hpp  | 57 +++++--------------
 .../range_search/range_search_impl.hpp        | 51 +++++------------
 src/mlpack/methods/rann/ra_search_impl.hpp    | 46 ++++-----------
 src/mlpack/prereqs.hpp                        |  1 +
 6 files changed, 60 insertions(+), 142 deletions(-)

diff --git a/src/mlpack/methods/emst/dtb_impl.hpp b/src/mlpack/methods/emst/dtb_impl.hpp
index 1d87ea73f19..60320757413 100644
--- a/src/mlpack/methods/emst/dtb_impl.hpp
+++ b/src/mlpack/methods/emst/dtb_impl.hpp
@@ -18,27 +18,25 @@ namespace mlpack {
 namespace emst {
 
 //! Call the tree constructor that does mapping.
-template<typename MatType, typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    MatType& dataset,
+    MatType&& dataset,
     std::vector<size_t>& oldFromNew,
-    const typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
+    const typename std::enable_if<
+        tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
-  return new TreeType(dataset, oldFromNew);
+  return new TreeType(std::forward<MatType>(dataset), oldFromNew);
 }
 
 //! Call the tree constructor that does not do mapping.
-template<typename MatType, typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    const MatType& dataset,
+    MatType&& dataset,
     const std::vector<size_t>& /* oldFromNew */,
-    const typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset == false, TreeType
-    >* = 0)
+    const typename std::enable_if<
+        !tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
-  return new TreeType(dataset);
+  return new TreeType(std::forward<MatType>(dataset));
 }
 
 /**
@@ -55,8 +53,7 @@ DualTreeBoruvka<MetricType, MatType, TreeType>::DualTreeBoruvka(
     const MatType& dataset,
     const bool naive,
     const MetricType metric) :
-    tree(naive ? NULL : BuildTree<MatType, Tree>(const_cast<MatType&>(dataset),
-        oldFromNew)),
+    tree(naive ? NULL : BuildTree<Tree>(dataset, oldFromNew)),
     data(naive ? dataset : tree->Dataset()),
     ownTree(!naive),
     naive(naive),
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
index 27c54d53a27..66b0e6519d1 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp
@@ -24,29 +24,27 @@ namespace mlpack {
 namespace kmeans {
 
 //! Call the tree constructor that does mapping.
-template<typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    const typename TreeType::Mat& dataset,
+    MatType&& dataset,
     std::vector<size_t>& oldFromNew,
-    const typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
+    const typename std::enable_if<
+        tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
   // This is a hack.  I know this will be BinarySpaceTree, so force a leaf size
   // of two.
-  return new TreeType(dataset, oldFromNew, 1);
+  return new TreeType(std::forward<MatType>(dataset), oldFromNew, 1);
 }
 
 //! Call the tree constructor that does not do mapping.
-template<typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    const typename TreeType::Mat& dataset,
+    MatType&& dataset,
     const std::vector<size_t>& /* oldFromNew */,
-    const typename std::enable_if_t<
-        !tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
+    const typename std::enable_if<
+        !tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
-  return new TreeType(dataset);
+  return new TreeType(std::forward<MatType>(dataset));
 }
 
 template<typename MetricType,
diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
index 3218659142b..ec814581b54 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
@@ -22,31 +22,7 @@ namespace mlpack {
 namespace neighbor {
 
 //! Call the tree constructor that does mapping.
-template<typename MatType, typename TreeType>
-TreeType* BuildTree(
-    const MatType& dataset,
-    std::vector<size_t>& oldFromNew,
-    typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
-{
-  return new TreeType(dataset, oldFromNew);
-}
-
-//! Call the tree constructor that does not do mapping.
-template<typename MatType, typename TreeType>
-TreeType* BuildTree(
-    const MatType& dataset,
-    const std::vector<size_t>& /* oldFromNew */,
-    const typename std::enable_if_t<
-        !tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
-{
-  return new TreeType(dataset);
-}
-
-//! Call the tree construct that does mapping.
-template<typename MatType, typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
     MatType&& dataset,
     std::vector<size_t>& oldFromNew,
@@ -54,19 +30,19 @@ TreeType* BuildTree(
         tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
     >* = 0)
 {
-  return new TreeType(std::move(dataset), oldFromNew);
+  return new TreeType(std::forward<MatType>(dataset), oldFromNew);
 }
 
 //! Call the tree constructor that does not do mapping.
-template<typename MatType, typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
     MatType&& dataset,
-    std::vector<size_t>& /* oldFromNew */,
-    typename std::enable_if_t<
+    const std::vector<size_t>& /* oldFromNew */,
+    const typename std::enable_if_t<
         !tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
     >* = 0)
 {
-  return new TreeType(std::move(dataset));
+  return new TreeType(std::forward<MatType>(dataset));
 }
 
 // Construct the object.
@@ -84,7 +60,7 @@ SingleTreeTraversalType>::NeighborSearch(const MatType& referenceSetIn,
                                          const double epsilon,
                                          const MetricType metric) :
     referenceTree(mode == NAIVE_MODE ? NULL :
-        BuildTree<MatType, Tree>(referenceSetIn, oldFromNewReferences)),
+        BuildTree<Tree>(referenceSetIn, oldFromNewReferences)),
     referenceSet(mode == NAIVE_MODE ? &referenceSetIn :
         &referenceTree->Dataset()),
     treeOwner(mode != NAIVE_MODE),
@@ -115,9 +91,8 @@ SingleTreeTraversalType>::NeighborSearch(MatType&& referenceSetIn,
                                          const double epsilon,
                                          const MetricType metric) :
     referenceTree(mode == NAIVE_MODE ? NULL :
-        BuildTree<MatType, Tree>(std::move(referenceSetIn),
-                                 oldFromNewReferences)),
-    referenceSet(mode == NAIVE_MODE ? new MatType(std::move(referenceSetIn)) :
+        BuildTree<Tree>(std::move(referenceSetIn), oldFromNewReferences)),
+    referenceSet(mode == NAIVE_MODE ?  new MatType(std::move(referenceSetIn)) :
         &referenceTree->Dataset()),
     treeOwner(mode != NAIVE_MODE),
     setOwner(mode == NAIVE_MODE),
@@ -220,8 +195,7 @@ SingleTreeTraversalType>::NeighborSearch(const NeighborSearchMode mode,
   // Build the tree on the empty dataset, if necessary.
   if (mode != NAIVE_MODE)
   {
-    referenceTree = BuildTree<MatType, Tree>(*referenceSet,
-        oldFromNewReferences);
+    referenceTree = BuildTree<Tree>(*referenceSet, oldFromNewReferences);
     treeOwner = true;
   }
 }
@@ -278,7 +252,7 @@ SingleTreeTraversalType>::NeighborSearch(NeighborSearch&& other) :
 {
   // Clear the other model.
   other.referenceSet = new MatType();
-  other.referenceTree = BuildTree<MatType, Tree>(*other.referenceSet,
+  other.referenceTree = BuildTree<Tree>(*other.referenceSet,
       other.oldFromNewReferences);
   other.treeOwner = true;
   other.setOwner = true;
@@ -373,7 +347,7 @@ NeighborSearch<SortPolicy,
 
   // Reset the other object.
   other.referenceSet = new MatType();
-  other.referenceTree = BuildTree<MatType, Tree>(*other.referenceSet,
+  other.referenceTree = BuildTree<Tree>(*other.referenceSet,
       other.oldFromNewReferences);
   other.treeOwner = true;
   other.setOwner = true;
@@ -424,8 +398,7 @@ DualTreeTraversalType, SingleTreeTraversalType>::Train(
   // We may need to rebuild the tree.
   if (searchMode != NAIVE_MODE)
   {
-    referenceTree = BuildTree<MatType, Tree>(referenceSet,
-        oldFromNewReferences);
+    referenceTree = BuildTree<Tree>(referenceSet, oldFromNewReferences);
     treeOwner = true;
   }
   else
@@ -465,7 +438,7 @@ DualTreeTraversalType, SingleTreeTraversalType>::Train(MatType&& referenceSetIn)
   // We may need to rebuild the tree.
   if (searchMode != NAIVE_MODE)
   {
-    referenceTree = BuildTree<MatType, Tree>(std::move(referenceSetIn),
+    referenceTree = BuildTree<Tree>(std::move(referenceSetIn),
         oldFromNewReferences);
     treeOwner = true;
   }
@@ -656,7 +629,7 @@ DualTreeTraversalType, SingleTreeTraversalType>::Search(
       // Build the query tree.
       Timer::Stop("computing_neighbors");
       Timer::Start("tree_building");
-      Tree* queryTree = BuildTree<MatType, Tree>(querySet, oldFromNewQueries);
+      Tree* queryTree = BuildTree<Tree>(querySet, oldFromNewQueries);
       Timer::Stop("tree_building");
       Timer::Start("computing_neighbors");
 
diff --git a/src/mlpack/methods/range_search/range_search_impl.hpp b/src/mlpack/methods/range_search/range_search_impl.hpp
index 5c6571a7997..f1a813c1668 100644
--- a/src/mlpack/methods/range_search/range_search_impl.hpp
+++ b/src/mlpack/methods/range_search/range_search_impl.hpp
@@ -21,49 +21,25 @@
 namespace mlpack {
 namespace range {
 
-template<typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    typename TreeType::Mat& dataset,
+    MatType&& dataset,
     std::vector<size_t>& oldFromNew,
-    typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
+    const typename std::enable_if<
+        tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
-  return new TreeType(dataset, oldFromNew);
+  return new TreeType(std::forward<MatType>(dataset), oldFromNew);
 }
 
 //! Call the tree constructor that does not do mapping.
-template<typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    const typename TreeType::Mat& dataset,
+    MatType&& dataset,
     const std::vector<size_t>& /* oldFromNew */,
-    const typename std::enable_if_t<
-        !tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
+    const typename std::enable_if<
+        !tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
-  return new TreeType(dataset);
-}
-
-template<typename TreeType>
-TreeType* BuildTree(
-    typename TreeType::Mat&& dataset,
-    std::vector<size_t>& oldFromNew,
-    const typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
-{
-  return new TreeType(std::move(dataset), oldFromNew);
-}
-
-template<typename TreeType>
-TreeType* BuildTree(
-    typename TreeType::Mat&& dataset,
-    const std::vector<size_t>& /* oldFromNew */,
-    const typename std::enable_if_t<
-        !tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
-{
-  return new TreeType(std::move(dataset));
+  return new TreeType(std::forward<MatType>(dataset));
 }
 
 template<typename MetricType,
@@ -76,8 +52,8 @@ RangeSearch<MetricType, MatType, TreeType>::RangeSearch(
     const bool naive,
     const bool singleMode,
     const MetricType metric) :
-    referenceTree(naive ? NULL : BuildTree<Tree>(
-        const_cast<MatType&>(referenceSetIn), oldFromNewReferences)),
+    referenceTree(naive ? NULL : BuildTree<Tree>(referenceSetIn,
+        oldFromNewReferences)),
     referenceSet(naive ? &referenceSetIn : &referenceTree->Dataset()),
     treeOwner(!naive), // If in naive mode, we are not building any trees.
     setOwner(false),
@@ -497,8 +473,7 @@ void RangeSearch<MetricType, MatType, TreeType>::Search(
     // Build the query tree.
     Timer::Stop("range_search/computing_neighbors");
     Timer::Start("range_search/tree_building");
-    Tree* queryTree = BuildTree<Tree>(const_cast<MatType&>(querySet),
-        oldFromNewQueries);
+    Tree* queryTree = BuildTree<Tree>(querySet, oldFromNewQueries);
     Timer::Stop("range_search/tree_building");
     Timer::Start("range_search/computing_neighbors");
 
diff --git a/src/mlpack/methods/rann/ra_search_impl.hpp b/src/mlpack/methods/rann/ra_search_impl.hpp
index fca31657ad3..23984132fd9 100644
--- a/src/mlpack/methods/rann/ra_search_impl.hpp
+++ b/src/mlpack/methods/rann/ra_search_impl.hpp
@@ -23,51 +23,25 @@ namespace neighbor {
 namespace aux {
 
 //! Call the tree constructor that does mapping.
-template<typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    const typename TreeType::Mat& dataset,
+    MatType&& dataset,
     std::vector<size_t>& oldFromNew,
-    typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
+    typename std::enable_if<
+        tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
-  return new TreeType(dataset, oldFromNew);
+  return new TreeType(std::forward<MatType>(dataset), oldFromNew);
 }
 
 //! Call the tree constructor that does not do mapping.
-template<typename TreeType>
+template<typename TreeType, typename MatType>
 TreeType* BuildTree(
-    const typename TreeType::Mat& dataset,
+    MatType&& dataset,
     const std::vector<size_t>& /* oldFromNew */,
-    const typename std::enable_if_t<
-        !tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
+    const typename std::enable_if<
+        !tree::TreeTraits<TreeType>::RearrangesDataset>::type* = 0)
 {
-  return new TreeType(dataset);
-}
-
-//! Call the tree constructor that does mapping.
-template<typename TreeType>
-TreeType* BuildTree(
-    typename TreeType::Mat&& dataset,
-    std::vector<size_t>& oldFromNew,
-    typename std::enable_if_t<
-        tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
-{
-  return new TreeType(std::move(dataset), oldFromNew);
-}
-
-//! Call the tree constructor that does not do mapping.
-template<typename TreeType>
-TreeType* BuildTree(
-    typename TreeType::Mat&& dataset,
-    const std::vector<size_t>& /* oldFromNew */,
-    const typename std::enable_if_t<
-        !tree::TreeTraits<TreeType>::RearrangesDataset, TreeType
-    >* = 0)
-{
-  return new TreeType(std::move(dataset));
+  return new TreeType(std::forward<MatType>(dataset));
 }
 
 } // namespace aux
diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp
index e46fa859ab6..42a49d637cb 100644
--- a/src/mlpack/prereqs.hpp
+++ b/src/mlpack/prereqs.hpp
@@ -34,6 +34,7 @@
 #include <stdexcept>
 #include <tuple>
 #include <queue>
+#include <utility>
 
 // But if it's not defined, we'll do it.
 #ifndef M_PI

From f790cb1f181298c609a7455d4150fec5b2083327 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Thu, 13 Apr 2017 01:39:17 +0200
Subject: [PATCH 66/78] Increase data matrix and number of iterations for the
 power method.

---
 src/mlpack/tests/block_krylov_svd_test.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp
index 44b3302b9c8..fa608596fff 100644
--- a/src/mlpack/tests/block_krylov_svd_test.cpp
+++ b/src/mlpack/tests/block_krylov_svd_test.cpp
@@ -94,7 +94,7 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDReconstructionError)
 BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest)
 {
   arma::mat data;
-  CreateNoisyLowRankMatrix(data, 100, 1000, 5, 1.0);
+  CreateNoisyLowRankMatrix(data, 200, 1000, 5, 0.5);
 
   const size_t rank = 5;
 
@@ -103,15 +103,10 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest)
 
   arma::svd_econ(U1, s1, V1, data);
 
-  svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 5, rank, 5);
-
-  double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank)));
-  BOOST_REQUIRE_SMALL(error, 0.1);
-
   svd::RandomizedBlockKrylovSVD rSVDB(data, U2, s2, V2, 10, rank, 20);
 
-  error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank)));
-  BOOST_REQUIRE_SMALL(error, 1e-3);
+  double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank)));
+  BOOST_REQUIRE_SMALL(error, 1e-2);
 }
 
 BOOST_AUTO_TEST_SUITE_END();

From 03ba9bf6d1bb047bb83e1359842b42ef7481d66b Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 13 Apr 2017 11:03:11 -0400
Subject: [PATCH 67/78] Add first non-human contributor.

---
 COPYRIGHT.txt       | 1 +
 src/mlpack/core.hpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
index d39c4a2b5cb..bf9841da5f7 100644
--- a/COPYRIGHT.txt
+++ b/COPYRIGHT.txt
@@ -78,6 +78,7 @@ Copyright:
   Copyright 2017, Sagar B Hathwar <sagarbhathwar@gmail.com>
   Copyright 2017, Nishanth Hegde <hegde.nishanth@gmail.com>
   Copyright 2017, Parminder Singh <parmsingh101@gmail.com>
+  Copyright 2017, CodeAi <benjamin.bales@assrc.us>
 
 License: BSD-3-clause
   All rights reserved.
diff --git a/src/mlpack/core.hpp b/src/mlpack/core.hpp
index 0880064c5eb..94a2ab84e75 100644
--- a/src/mlpack/core.hpp
+++ b/src/mlpack/core.hpp
@@ -219,6 +219,7 @@
  *   - Sagar B Hathwar <sagarbhathwar@gmail.com>
  *   - Nishanth Hegde <hegde.nishanth@gmail.com>
  *   - Parminder Singh <parmsingh101@gmail.com>
+ *   - CodeAi (deep learning bug detector) <benjamin.bales@assrc.us>
  */
 
 // First, include all of the prerequisites.

From 27ab7fa20044751fef460bb06ab5c44e2ab369de Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 13 Apr 2017 15:24:53 -0400
Subject: [PATCH 68/78] Update history.

---
 HISTORY.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/HISTORY.md b/HISTORY.md
index e5883ec98b5..5541f6326df 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,6 +1,11 @@
 ### mlpack ?.?.?
 ###### ????-??-??
 
+### mlpack 2.2.1
+###### 2017-04-13
+  * Compilation fix for mlpack_nca and mlpack_test on older Armadillo versions
+    (#984).
+
 ### mlpack 2.2.0
 ###### 2017-03-21
   * Bugfix for mlpack_knn program (#816).

From dec8a4ffcbe235362d62870d30f05255bd1a13c6 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Thu, 13 Apr 2017 15:36:20 -0400
Subject: [PATCH 69/78] Update version numbers.

---
 doc/guide/build.hpp              | 4 ++--
 src/mlpack/core/util/version.hpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/guide/build.hpp b/doc/guide/build.hpp
index b9f9913d03e..6b999913ecd 100644
--- a/doc/guide/build.hpp
+++ b/doc/guide/build.hpp
@@ -23,14 +23,14 @@ href="https://keon.io/mlpack/mlpack-on-windows/">Keon's excellent tutorial</a>.
 
 @section Download latest mlpack build
 Download latest mlpack build from here:
-<a href="http://www.mlpack.org/files/mlpack-2.2.0.tar.gz">mlpack-2.2.0</a>
+<a href="http://www.mlpack.org/files/mlpack-2.2.1.tar.gz">mlpack-2.2.1</a>
 
 @section builddir Creating Build Directory
 
 Once the mlpack source is unpacked, you should create a build directory.
 
 @code
-$ cd mlpack-2.2.0
+$ cd mlpack-2.2.1
 $ mkdir build
 @endcode
 
diff --git a/src/mlpack/core/util/version.hpp b/src/mlpack/core/util/version.hpp
index b3dea0c3a52..a921444257e 100644
--- a/src/mlpack/core/util/version.hpp
+++ b/src/mlpack/core/util/version.hpp
@@ -17,13 +17,13 @@
 // The version of mlpack.  If this is a git repository, this will be a version
 // with higher number than the most recent release.
 #define MLPACK_VERSION_MAJOR 2
-#define MLPACK_VERSION_MINOR 0
+#define MLPACK_VERSION_MINOR 2
 #define MLPACK_VERSION_PATCH "x"
 
 // Reverse compatibility; these macros will be removed in future versions of
 // mlpack (3.0.0 and newer)!
 #define __MLPACK_VERSION_MAJOR 2
-#define __MLPACK_VERSION_MINOR 0
+#define __MLPACK_VERSION_MINOR 2
 #define __MLPACK_VERSION_PATCH "x"
 
 // The name of the version (for use by --version).

From e7bfbb19dfbc8cecb5ae094fc3cc45ca0ef87e6a Mon Sep 17 00:00:00 2001
From: Vivek Pal <vivekpal.dtu@gmail.com>
Date: Fri, 14 Apr 2017 23:05:35 +0530
Subject: [PATCH 70/78] Add paper url to the citation

---
 src/mlpack/core/optimizers/adam/adam.hpp          | 3 ++-
 src/mlpack/core/optimizers/adam/adam_update.hpp   | 3 ++-
 src/mlpack/core/optimizers/adam/adamax_update.hpp | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index 37411d9bb0b..d4808c6cd93 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -40,7 +40,8 @@ namespace optimization {
  *   author    = {Diederik P. Kingma and Jimmy Ba},
  *   title     = {Adam: {A} Method for Stochastic Optimization},
  *   journal   = {CoRR},
- *   year      = {2014}
+ *   year      = {2014},
+ *   url       = {http://arxiv.org/abs/1412.6980}
  * }
  * @endcode
  *
diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index 95920c14ee7..540ae1ce01c 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -34,7 +34,8 @@ namespace optimization {
  *   author    = {Diederik P. Kingma and Jimmy Ba},
  *   title     = {Adam: {A} Method for Stochastic Optimization},
  *   journal   = {CoRR},
- *   year      = {2014}
+ *   year      = {2014},
+ *   url       = {http://arxiv.org/abs/1412.6980}
  * }
  * @endcode
  */
diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp
index 97e26fe683d..6337efaae3a 100644
--- a/src/mlpack/core/optimizers/adam/adamax_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp
@@ -36,7 +36,8 @@ namespace optimization {
  *   author    = {Diederik P. Kingma and Jimmy Ba},
  *   title     = {Adam: {A} Method for Stochastic Optimization},
  *   journal   = {CoRR},
- *   year      = {2014}
+ *   year      = {2014},
+ *   url       = {http://arxiv.org/abs/1412.6980}
  * }
  * @endcode
  */

From 0d65254c6c8833edc62de9591ea28570ad7b42ee Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Fri, 14 Apr 2017 14:28:08 +0200
Subject: [PATCH 71/78] Minor style fix.

---
 .../pca/decomposition_policies/exact_svd_method.hpp |  2 +-
 .../pca/decomposition_policies/quic_svd_method.hpp  | 13 ++++++-------
 .../randomized_svd_method.hpp                       | 13 ++++++-------
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
index fe0fb0c9ffe..dd5fa1e987e 100644
--- a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
+++ b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
@@ -26,7 +26,7 @@ namespace pca {
  */
 class ExactSVDPolicy
 {
-  public:
+ public:
   /**
    * Apply Principal Component Analysis to the provided data set using the
    * exact SVD method.
diff --git a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
index df18f0ba882..f3ecc2103d8 100644
--- a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
+++ b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
@@ -25,8 +25,7 @@ namespace pca {
  */
 class QUICSVDPolicy
 {
-  public:
-
+ public:
   /**
    * Use QUIC-SVD method to perform the principal components analysis (PCA).
    *
@@ -83,12 +82,12 @@ class QUICSVDPolicy
   //! Modify the cumulative probability for Monte Carlo error lower bound.
   double& Delta() { return delta; }
 
-  private:
-    //! Error tolerance fraction for calculated subspace.
-    double epsilon;
+ private:
+  //! Error tolerance fraction for calculated subspace.
+  double epsilon;
 
-    //! Cumulative probability for Monte Carlo error lower bound.
-    double delta;
+  //! Cumulative probability for Monte Carlo error lower bound.
+  double delta;
 };
 
 } // namespace pca
diff --git a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
index f7f9089b4c6..148dfb9e02a 100644
--- a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
+++ b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
@@ -16,7 +16,6 @@
 
 #include <mlpack/prereqs.hpp>
 #include <mlpack/methods/randomized_svd/randomized_svd.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
 
 namespace mlpack {
 namespace pca {
@@ -26,7 +25,7 @@ namespace pca {
  */
 class RandomizedSVDPolicy
 {
-  public:
+ public:
   /**
    * Use randomized SVD method to perform the principal components analysis
    * (PCA).
@@ -88,12 +87,12 @@ class RandomizedSVDPolicy
   //! Modify the number of iterations for the power method.
   size_t& MaxIterations() { return maxIterations; }
 
-  private:
-    //! Locally stored size of the normalized power iterations.
-    size_t iteratedPower;
+ private:
+  //! Locally stored size of the normalized power iterations.
+  size_t iteratedPower;
 
-    //! Locally stored number of iterations for the power method.
-    size_t maxIterations;
+  //! Locally stored number of iterations for the power method.
+  size_t maxIterations;
 };
 
 } // namespace pca

From 12e433d4bdd26d19f964bb6502be2cc73c22d4c7 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Fri, 14 Apr 2017 15:39:15 +0200
Subject: [PATCH 72/78] Add randomized block krylov SVD policy to perform the
 principal components analysis (PCA).

---
 .../pca/decomposition_policies/CMakeLists.txt |   1 +
 .../randomized_block_krylov_method.hpp        | 101 ++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp

diff --git a/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
index 968c7cc4bb9..85bbb7c305b 100644
--- a/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
+++ b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt
@@ -2,6 +2,7 @@
 # Anything not in this list will not be compiled into mlpack.
 set(SOURCES
   exact_svd_method.hpp
+  randomized_block_krylov_method.hpp
   randomized_svd_method.hpp
   quic_svd_method.hpp
 )
diff --git a/src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp b/src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp
new file mode 100644
index 00000000000..957cd3bbd61
--- /dev/null
+++ b/src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp
@@ -0,0 +1,101 @@
+/**
+ * @file randomized_block_krylov_method.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the randomized block krylov SVD method for use in the
+ * Principal Components Analysis method.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_BLOCK_KRYLOV_HPP
+#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_BLOCK_KRYLOV_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp>
+
+namespace mlpack {
+namespace pca {
+
+/**
+ * Implementation of the randomized block krylov SVD policy.
+ */
+class RandomizedBlockKrylovSVDPolicy
+{
+ public:
+  /**
+   * Use randomized block krylov SVD method to perform the principal components
+   * analysis (PCA).
+   *
+   * @param maxIterations Number of iterations for the power method
+   *        (Default: 2).
+   * @param blockSize The block size, must be >= rank (Default: rank + 10).
+   */
+  RandomizedBlockKrylovSVDPolicy(const size_t maxIterations = 2,
+                                 const size_t blockSize = 0) :
+      maxIterations(maxIterations),
+      blockSize(blockSize)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Apply Principal Component Analysis to the provided data set using the
+   * randomized block krylov SVD method.
+   *
+   * @param data Data matrix.
+   * @param centeredData Centered data matrix.
+   * @param transformedData Matrix to put results of PCA into.
+   * @param eigVal Vector to put eigenvalues into.
+   * @param eigvec Matrix to put eigenvectors (loadings) into.
+   * @param rank Rank of the decomposition.
+   */
+  void Apply(const arma::mat& data,
+             const arma::mat& centeredData,
+             arma::mat& transformedData,
+             arma::vec& eigVal,
+             arma::mat& eigvec,
+             const size_t rank)
+  {
+    // This matrix will store the right singular values; we do not need them.
+    arma::mat v;
+
+    // Do singular value decomposition using the randomized block krylov SVD
+    // algorithm.
+    svd::RandomizedBlockKrylovSVD rsvd(maxIterations, blockSize);
+    rsvd.Apply(centeredData, eigvec, eigVal, v, rank);
+
+    // Now we must square the singular values to get the eigenvalues.
+    // In addition we must divide by the number of points, because the
+    // covariance matrix is X * X' / (N - 1).
+    eigVal %= eigVal / (data.n_cols - 1);
+
+    // Project the samples to the principals.
+    transformedData = arma::trans(eigvec) * centeredData;
+  }
+
+  //! Get the number of iterations for the power method.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the number of iterations for the power method.
+  size_t& MaxIterations() { return maxIterations; }
+
+  //! Get the block size.
+  size_t BlockSize() const { return blockSize; }
+  //! Modify the block size.
+  size_t& BlockSize() { return blockSize; }
+
+ private:
+  //! Locally stored number of iterations for the power method.
+  size_t maxIterations;
+
+  //! Locally stored block size value.
+  size_t blockSize;
+};
+
+} // namespace pca
+} // namespace mlpack
+
+#endif

From dec81074ef22d191ab574b37a70252c80ed6c7f5 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Fri, 14 Apr 2017 15:44:02 +0200
Subject: [PATCH 73/78] Tests for the randomized block krylov pca policy.

---
 src/mlpack/tests/pca_test.cpp | 36 ++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/src/mlpack/tests/pca_test.cpp b/src/mlpack/tests/pca_test.cpp
index 5ec70b15bb9..926fff93118 100644
--- a/src/mlpack/tests/pca_test.cpp
+++ b/src/mlpack/tests/pca_test.cpp
@@ -15,6 +15,7 @@
 #include <mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp>
 #include <mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp>
 #include <mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp>
+#include <mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp>
 
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"
@@ -31,15 +32,17 @@ using namespace mlpack::distribution;
  * specified decomposition policy.
  */
 template<typename DecompositionPolicy>
-void ArmaComparisonPCA()
+void ArmaComparisonPCA(
+    const bool scaleData = false,
+    const DecompositionPolicy& decomposition = DecompositionPolicy())
 {
   arma::mat coeff, coeff1, score, score1;
   arma::vec eigVal, eigVal1;
 
   arma::mat data = arma::randu<arma::mat>(3, 1000);
 
-  PCAType<DecompositionPolicy> exactPCA;
-  exactPCA.Apply(data, score1, eigVal1, coeff1);
+  PCAType<DecompositionPolicy> pcaType(scaleData, decomposition);
+  pcaType.Apply(data, score1, eigVal1, coeff1);
 
   princomp(coeff, score, eigVal, trans(data));
 
@@ -58,7 +61,9 @@ void ArmaComparisonPCA()
  * (which should be correct!) using the specified decomposition policy.
  */
 template<typename DecompositionPolicy>
-void PCADimensionalityReduction()
+void PCADimensionalityReduction(
+    const bool scaleData = false,
+    const DecompositionPolicy& decomposition = DecompositionPolicy())
 {
   // Fake, simple dataset.  The results we will compare against are from MATLAB.
   mat data("1 0 2 3 9;"
@@ -66,7 +71,7 @@ void PCADimensionalityReduction()
            "6 7 3 1 8");
 
   // Now run PCA to reduce the dimensionality.
-  PCAType<DecompositionPolicy> p;
+  PCAType<DecompositionPolicy> p(scaleData, decomposition);
   const double varRetained = p.Apply(data, 2); // Reduce to 2 dimensions.
 
   // Compare with correct results.
@@ -168,6 +173,16 @@ BOOST_AUTO_TEST_CASE(ArmaComparisonExactPCATest)
   ArmaComparisonPCA<ExactSVDPolicy>();
 }
 
+/**
+ * Compare the output of our randomized block krylov PCA implementation with
+ * Armadillo's.
+ */
+BOOST_AUTO_TEST_CASE(ArmaComparisonRandomizedBlockKrylovPCATest)
+{
+  RandomizedBlockKrylovSVDPolicy decomposition(5);
+  ArmaComparisonPCA<RandomizedBlockKrylovSVDPolicy>(false, decomposition);
+}
+
 /**
  * Compare the output of our randomized-SVD PCA implementation with Armadillo's.
  */
@@ -185,6 +200,17 @@ BOOST_AUTO_TEST_CASE(ExactPCADimensionalityReductionTest)
   PCADimensionalityReduction<ExactSVDPolicy>();
 }
 
+/**
+ * Test that dimensionality reduction with randomized block krylov PCA works the
+ * same way MATLAB does (which should be correct!).
+ */
+BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovPCADimensionalityReductionTest)
+{
+  RandomizedBlockKrylovSVDPolicy decomposition(5);
+  PCADimensionalityReduction<RandomizedBlockKrylovSVDPolicy>(false,
+      decomposition);
+}
+
 /**
  * Test that dimensionality reduction with randomized-svd PCA works the same way
  * MATLAB does (which should be correct!).

From 8218d26b3998140844a6c3c08f2a08fb009424ee Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sat, 22 Apr 2017 22:19:48 +0200
Subject: [PATCH 74/78] Do not use block size for the block initialization if
 block size isn't specified, since block size approximation might be wrong.

---
 .../block_krylov_svd/randomized_block_krylov_svd.cpp      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
index bef09fd3992..57e9a5d201b 100644
--- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
+++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp
@@ -71,14 +71,14 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data,
       blockOffset += block.n_elem)
   {
     // Temporary working matrix to store the result in the correct place.
-    blockIteration = arma::mat(K.memptr() + blockOffset, data.n_rows,
-        blockSize, false);
+    blockIteration = arma::mat(K.memptr() + blockOffset, block.n_rows,
+        block.n_cols, false, false);
 
     arma::qr_econ(blockIteration, R, data * (data.t() * block));
 
     // Update working matrix for the next iteration.
-    block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false,
-        false);
+    block = arma::mat(K.memptr() + blockOffset, block.n_rows, block.n_cols,
+        false, false);
   }
 
   arma::qr_econ(Q, R, K);

From d7dc971b06276a35f08c5f6550df1e5c72382f0e Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sun, 23 Apr 2017 22:16:00 +0200
Subject: [PATCH 75/78] Remove unnecessary typecast (double to double).

---
 src/mlpack/core/optimizers/adam/adam_update.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index 540ae1ce01c..b420b57311a 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -96,8 +96,8 @@ class AdamUpdate
     v *= beta2;
     v += (1 - beta2) * (gradient % gradient);
 
-    const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration);
-    const double biasCorrection2 = 1.0 - std::pow(beta2, (double) iteration);
+    const double biasCorrection1 = 1.0 - std::pow(beta1, iteration);
+    const double biasCorrection2 = 1.0 - std::pow(beta2, iteration);
 
     /**
      * It should be noted that the term, m / (arma::sqrt(v) + eps), in the

From 504b02775c2196d59720f8426e9fed6080683039 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@fu-berlin.de>
Date: Sun, 23 Apr 2017 23:33:54 +0200
Subject: [PATCH 76/78] Minor style fixes (80 columns, clamp multiline, remove
 trailing spaces, comments).

---
 .../core/optimizers/ada_delta/ada_delta.hpp   |  8 ++---
 .../optimizers/ada_delta/ada_delta_update.hpp |  8 ++---
 .../core/optimizers/ada_grad/ada_grad.hpp     | 14 ++++----
 .../optimizers/ada_grad/ada_grad_update.hpp   |  4 +--
 src/mlpack/core/optimizers/adam/adam.hpp      | 24 ++++++-------
 .../core/optimizers/adam/adam_update.hpp      | 16 ++++-----
 .../core/optimizers/adam/adamax_update.hpp    |  4 +--
 .../gradient_descent/gradient_descent.hpp     |  6 ++--
 .../core/optimizers/rmsprop/rmsprop.hpp       |  6 ++--
 .../optimizers/rmsprop/rmsprop_update.hpp     | 13 +++----
 .../sgd/update_policies/momentum_update.hpp   | 36 +++++++++----------
 .../sgd/update_policies/vanilla_update.hpp    |  6 ++--
 .../core/optimizers/smorms3/smorms3.hpp       | 34 ++++++++++--------
 .../optimizers/smorms3/smorms3_update.hpp     | 31 ++++++++--------
 14 files changed, 108 insertions(+), 102 deletions(-)

diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp
index 8589374460c..9a209c63e7b 100644
--- a/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp
+++ b/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp
@@ -34,10 +34,10 @@ namespace optimization {
  *
  * @code
  * @article{Zeiler2012,
- *   author    = {Matthew D. Zeiler},
- *   title     = {{ADADELTA:} An Adaptive Learning Rate Method},
- *   journal   = {CoRR},
- *   year      = {2012}
+ *   author  = {Matthew D. Zeiler},
+ *   title   = {{ADADELTA:} An Adaptive Learning Rate Method},
+ *   journal = {CoRR},
+ *   year    = {2012}
  * }
  * @endcode
  *
diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
index 391e2a88bd3..fc3d0bcde02 100644
--- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
+++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
@@ -29,10 +29,10 @@ namespace optimization {
  *
  * @code
  * @article{Zeiler2012,
- *   author    = {Matthew D. Zeiler},
- *   title     = {{ADADELTA:} An Adaptive Learning Rate Method},
- *   journal   = {CoRR},
- *   year      = {2012}
+ *   author  = {Matthew D. Zeiler},
+ *   title   = {{ADADELTA:} An Adaptive Learning Rate Method},
+ *   journal = {CoRR},
+ *   year    = {2012}
  * }
  * @endcode
  *
diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp
index e96b619e858..6c622c8e3e7 100644
--- a/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp
+++ b/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp
@@ -30,13 +30,13 @@ namespace optimization {
  *
  * @code
  * @article{duchi2011adaptive,
- *   author    = {Duchi, John and Hazan, Elad and Singer, Yoram},
- *   title     = {Adaptive subgradient methods for online learning and stochastic optimization},
- *   journal   = {Journal of Machine Learning Research},
- *   volume    = {12},
- *   number    = {Jul},
- *   pages     = {2121--2159},
- *   year      = {2011}
+ *   author  = {Duchi, John and Hazan, Elad and Singer, Yoram},
+ *   title   = {Adaptive subgradient methods for online learning and stochastic optimization},
+ *   journal = {Journal of Machine Learning Research},
+ *   volume  = {12},
+ *   number  = {Jul},
+ *   pages   = {2121--2159},
+ *   year    = {2011}
  * }
  * @endcode
  *
diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
index 2fb52573b1b..b086926388b 100644
--- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
+++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
@@ -58,8 +58,8 @@ class AdaGradUpdate
    * gradient matrix is initialized to the zeros matrix with the same size as
    * gradient matrix (see mlpack::optimization::SGD::Optimizer).
    *
-   * @param rows number of rows in the gradient matrix.
-   * @param cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
   void Initialize(const size_t rows,
                   const size_t cols)
diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index d4808c6cd93..23615fdd9e6 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -37,11 +37,11 @@ namespace optimization {
  *
  * @code
  * @article{Kingma2014,
- *   author    = {Diederik P. Kingma and Jimmy Ba},
- *   title     = {Adam: {A} Method for Stochastic Optimization},
- *   journal   = {CoRR},
- *   year      = {2014},
- *   url       = {http://arxiv.org/abs/1412.6980}
+ *   author  = {Diederik P. Kingma and Jimmy Ba},
+ *   title   = {Adam: {A} Method for Stochastic Optimization},
+ *   journal = {CoRR},
+ *   year    = {2014},
+ *   url     = {http://arxiv.org/abs/1412.6980}
  * }
  * @endcode
  *
@@ -95,13 +95,13 @@ class AdamType
    *        function is visited in linear order.
    */
   AdamType(DecomposableFunctionType& function,
-          const double stepSize = 0.001,
-          const double beta1 = 0.9,
-          const double beta2 = 0.999,
-          const double eps = 1e-8,
-          const size_t maxIterations = 100000,
-          const double tolerance = 1e-5,
-          const bool shuffle = true);
+           const double stepSize = 0.001,
+           const double beta1 = 0.9,
+           const double beta2 = 0.999,
+           const double eps = 1e-8,
+           const size_t maxIterations = 100000,
+           const double tolerance = 1e-5,
+           const bool shuffle = true);
 
   /**
    * Optimize the given function using Adam. The given starting point will be
diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp
index b420b57311a..0f64c82327c 100644
--- a/src/mlpack/core/optimizers/adam/adam_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adam_update.hpp
@@ -31,11 +31,11 @@ namespace optimization {
  *
  * @code
  * @article{Kingma2014,
- *   author    = {Diederik P. Kingma and Jimmy Ba},
- *   title     = {Adam: {A} Method for Stochastic Optimization},
- *   journal   = {CoRR},
- *   year      = {2014},
- *   url       = {http://arxiv.org/abs/1412.6980}
+ *   author  = {Diederik P. Kingma and Jimmy Ba},
+ *   title   = {Adam: {A} Method for Stochastic Optimization},
+ *   journal = {CoRR},
+ *   year    = {2014},
+ *   url     = {http://arxiv.org/abs/1412.6980}
  * }
  * @endcode
  */
@@ -65,8 +65,8 @@ class AdamUpdate
    * The Initialize method is called by SGD Optimizer method before the start of
    * the iteration update process.
    *
-   * @param rows number of rows in the gradient matrix.
-   * @param cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
   void Initialize(const size_t rows,
                   const size_t cols)
@@ -105,7 +105,7 @@ class AdamUpdate
      * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps).
      */
     iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) *
-                m / (arma::sqrt(v) + epsilon);
+        m / (arma::sqrt(v) + epsilon);
   }
 
   //! Get the value used to initialise the squared gradient parameter.
diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp
index 6337efaae3a..9ac281cca8e 100644
--- a/src/mlpack/core/optimizers/adam/adamax_update.hpp
+++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp
@@ -67,8 +67,8 @@ class AdaMaxUpdate
    * The Initialize method is called by SGD Optimizer method before the start of
    * the iteration update process.
    *
-   * @param rows number of rows in the gradient matrix.
-   * @param cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
   void Initialize(const size_t rows,
                   const size_t cols)
diff --git a/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
index 78ee1038130..79fb8dded78 100644
--- a/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
+++ b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
@@ -66,9 +66,9 @@ class GradientDescent
    * @param tolerance Maximum absolute tolerance to terminate algorithm.
    */
   GradientDescent(FunctionType& function,
-      const double stepSize = 0.01,
-      const size_t maxIterations = 100000,
-      const double tolerance = 1e-5);
+                  const double stepSize = 0.01,
+                  const size_t maxIterations = 100000,
+                  const double tolerance = 1e-5);
 
   /**
    * Optimize the given function using gradient descent.  The given starting
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
index 4d67dd906b2..c3da802d701 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
@@ -38,9 +38,9 @@ namespace optimization {
  *
  * @code
  * @misc{tieleman2012,
- *   title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine
- *   Learning},
- *   year={2012}
+ *   title = {Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine
+ *            Learning},
+ *   year  = {2012}
  * }
  * @endcode
  *
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
index b9a821a8c48..c86f09f0c3c 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp
@@ -35,9 +35,9 @@ namespace optimization {
  *
  * @code
  * @misc{tieleman2012,
- *   title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine
- *   Learning},
- *   year={2012}
+ *   title = {Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine
+ *            Learning},
+ *   year  = {2012}
  * }
  * @endcode
  */
@@ -63,8 +63,8 @@ class RMSPropUpdate
    * The Initialize method is called by SGD Optimizer method before the start of
    * the iteration update process.
    *
-   * @param rows number of rows in the gradient matrix.
-   * @param cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
   void Initialize(const size_t rows,
                   const size_t cols)
@@ -86,7 +86,8 @@ class RMSPropUpdate
   {
     meanSquaredGradient *= alpha;
     meanSquaredGradient += (1 - alpha) * (gradient % gradient);
-    iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + epsilon);
+    iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) +
+        epsilon);
   }
 
   //! Get the value used to initialise the squared gradient parameter.
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
index 2947c355f90..1ea08560349 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp
@@ -42,23 +42,23 @@ namespace optimization {
  *
  * @code
  * @article{rumelhart1988learning,
- *   title={Learning representations by back-propagating errors},
- *   author={Rumelhart, David E. and Hinton, Geoffrey E. and
- *        Williams, Ronald J.},
- *   journal={Cognitive Modeling},
- *   volume={5},
- *   number={3},
- *   pages={1},
- *   year={1988}
+ *   title   = {Learning representations by back-propagating errors},
+ *   author  = {Rumelhart, David E. and Hinton, Geoffrey E. and
+ *              Williams, Ronald J.},
+ *   journal = {Cognitive Modeling},
+ *   volume  = {5},
+ *   number  = {3},
+ *   pages   = {1},
+ *   year    = {1988}
  * }
  *
  * @code
  * @book{Goodfellow-et-al-2016,
- *  title={Deep Learning},
- *  author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
- *  publisher={MIT Press},
- *  note={\url{http://www.deeplearningbook.org}},
- *  year={2016}
+ *  title     = {Deep Learning},
+ *  author    = {Ian Goodfellow and Yoshua Bengio and Aaron Courville},
+ *  publisher = {MIT Press},
+ *  note      = {\url{http://www.deeplearningbook.org}},
+ *  year      = {2016}
  * }
  */
 class MomentumUpdate
@@ -78,14 +78,14 @@ class MomentumUpdate
    * matrix is initialized to the zeros matrix with the same size as the
    * gradient matrix (see mlpack::optimization::SGD::Optimizer )
    *
-   * @param n_rows number of rows in the gradient matrix.
-   * @param n_cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
-  void Initialize(const size_t n_rows,
-                  const size_t n_cols)
+  void Initialize(const size_t rows,
+                  const size_t cols)
   {
     //Initialize am empty velocity matrix.
-    velocity = arma::zeros<arma::mat>(n_rows, n_cols);
+    velocity = arma::zeros<arma::mat>(rows, cols);
   }
 
   /**
diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
index 1bd85bf1c37..d76264864b7 100644
--- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
+++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp
@@ -36,10 +36,10 @@ class VanillaUpdate
    * the iteration update process.  The vanilla update doesn't initialize
    * anything.
    *
-   * @param n_rows number of rows in the gradient matrix.
-   * @param n_cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
-  void Initialize(const size_t /* n_rows */, const size_t /* n_cols */)
+  void Initialize(const size_t /* rows */, const size_t /* cols */)
   { /* Do nothing. */ }
 
  /**
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
index 41643c80236..d8b67497079 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp
@@ -31,16 +31,16 @@ namespace optimization {
  *
  * @code
  * @misc{Funk2015,
- *   author    = {Simon Funk},
- *   title     = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
- *   year      = {2015}
- *   url       = {http://sifter.org/~simon/journal/20150420.html}
+ *   author = {Simon Funk},
+ *   title  = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
+ *   year   = {2015}
+ *   url    = {http://sifter.org/~simon/journal/20150420.html}
  * }
  * @endcode
  *
  *
- * For SMORMS3 to work, a DecomposableFunctionType template parameter is required.
- * This class must implement the following function:
+ * For SMORMS3 to work, a DecomposableFunctionType template parameter is
+ * required. This class must implement the following function:
  *
  *   size_t NumFunctions();
  *   double Evaluate(const arma::mat& coordinates, const size_t i);
@@ -57,7 +57,7 @@ namespace optimization {
  * is held internally in the DecomposableFunctionType).
  *
  * @tparam DecomposableFunctionType Decomposable objective function type to be
- *     minimized.
+ *         minimized.
  */
 template<typename DecomposableFunctionType>
 class SMORMS3
@@ -73,7 +73,8 @@ class SMORMS3
    *
    * @param function Function to be optimized (minimized).
    * @param stepSize Step size for each iteration.
-   * @param epsilon Value used to initialise the mean squared gradient parameter.
+   * @param epsilon Value used to initialise the mean squared gradient
+   *        parameter.
    * @param maxIterations Maximum number of iterations allowed (0 means no
    *        limit).
    * @param tolerance Maximum absolute tolerance to terminate algorithm.
@@ -81,14 +82,14 @@ class SMORMS3
    *        function is visited in linear order.
    */
   SMORMS3(DecomposableFunctionType& function,
-      const double stepSize = 0.001,
-      const double epsilon = 1e-16,
-      const size_t maxIterations = 100000,
-      const double tolerance = 1e-5,
-      const bool shuffle = true);
+          const double stepSize = 0.001,
+          const double epsilon = 1e-16,
+          const size_t maxIterations = 100000,
+          const double tolerance = 1e-5,
+          const bool shuffle = true);
 
   /**
-   * Optimize the given function using SMORMS3. The given starting point will 
+   * Optimize the given function using SMORMS3. The given starting point will
    * be modified to store the finishing point of the algorithm, and the final
    * objective value is returned.
    *
@@ -98,7 +99,10 @@ class SMORMS3
   double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); }
 
   //! Get the instantiated function to be optimized.
-  const DecomposableFunctionType& Function() const { return optimizer.Function(); }
+  const DecomposableFunctionType& Function() const
+  {
+    return optimizer.Function();
+  }
   //! Modify the instantiated function.
   DecomposableFunctionType& Function() { return optimizer.Function(); }
 
diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
index fbc534926be..3cf35eb7df2 100644
--- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
+++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp
@@ -26,10 +26,10 @@ namespace optimization {
  *
  * @code
  * @misc{Funk2015,
- *   author    = {Simon Funk},
- *   title     = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
- *   year      = {2015}
- *   url       = {http://sifter.org/~simon/journal/20150420.html}
+ *   author = {Simon Funk},
+ *   title  = {RMSprop loses to SMORMS3 - Beware the Epsilon!},
+ *   year   = {2015}
+ *   url    = {http://sifter.org/~simon/journal/20150420.html}
  * }
  * @endcode
  */
@@ -40,23 +40,18 @@ class SMORMS3Update
   /**
    * Construct the SMORMS3 update policy with given epsilon parameter.
    *
-   * @param epsilon Value used to initialise the mean squared gradient parameter.
+   * @param epsilon Value used to initialise the mean squared gradient
+   *        parameter.
    */
-  SMORMS3Update(const double epsilon = 1e-16) :
-    epsilon(epsilon)
+  SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon)
   { /* Do nothing. */ }
 
-  //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return epsilon; }
-  //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return epsilon; }
-
   /**
    * The Initialize method is called by SGD::Optimize method with UpdatePolicy
    * SMORMS3Update before the start of the iteration update process.
    *
-   * @param rows number of rows in the gradient matrix.
-   * @param cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
   void Initialize(const size_t rows,
                   const size_t cols)
@@ -96,10 +91,16 @@ class SMORMS3Update
     mem %= (1 - x);
     mem += 1;
   }
+
+  //! Get the value used to initialise the mean squared gradient parameter.
+  double Epsilon() const { return epsilon; }
+  //! Modify the value used to initialise the mean squared gradient parameter.
+  double& Epsilon() { return epsilon; }
+
  private:
    //! The value used to initialise the mean squared gradient parameter.
    double epsilon;
-   
+
    // The parameters mem, g and g2.
    arma::mat mem, g, g2;
 };

From 2df61cf6fd9e0aaa5b37c65919e175f797796361 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Mon, 24 Apr 2017 11:06:38 -0400
Subject: [PATCH 77/78] Mark extra overload deprecated.

---
 src/mlpack/core/optimizers/lbfgs/lbfgs.hpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
index 422e6c7cc2f..8c6f64168fb 100644
--- a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
+++ b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
@@ -94,11 +94,15 @@ class L_BFGS
    * finishing point of the algorithm, and the final objective value is
    * returned.
    *
+   * This overload will be removed in mlpack 3.0.0---you should set
+   * maxIterations in the constructor instead.
+   *
    * @param iterate Starting point (will be modified).
    * @param maxIterations Maximum number of iterations (0 specifies no limit).
    * @return Objective value of the final point.
    */
-  double Optimize(arma::mat& iterate, const size_t maxIterations);
+  mlpack_deprecated double Optimize(arma::mat& iterate,
+                                    const size_t maxIterations);
 
   //! Return the function that is being optimized.
   const FunctionType& Function() const { return function; }

From 7f7561d9cbda272055868c9d3c3bba08d9f0f2af Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Mon, 24 Apr 2017 16:59:39 -0400
Subject: [PATCH 78/78] Switch implementations.

---
 src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp b/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp
index f8e44a383b7..38fa92bd5b1 100644
--- a/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp
+++ b/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp
@@ -326,9 +326,11 @@ L_BFGS<FunctionType>::MinPointIterate() const
 }
 
 template<typename FunctionType>
-inline double L_BFGS<FunctionType>::Optimize(arma::mat& iterate)
+inline double L_BFGS<FunctionType>::Optimize(arma::mat& iterate,
+                                             const size_t maxIterations)
 {
-  return Optimize(iterate, maxIterations);
+  this->maxIterations = maxIterations;
+  return Optimize(iterate);
 }
 
 /**
@@ -341,8 +343,7 @@ inline double L_BFGS<FunctionType>::Optimize(arma::mat& iterate)
  * @param iterate Starting point (will be modified)
  */
 template<typename FunctionType>
-double L_BFGS<FunctionType>::Optimize(arma::mat& iterate,
-                                      const size_t maxIterations)
+double L_BFGS<FunctionType>::Optimize(arma::mat& iterate)
 {
   // Ensure that the cubes holding past iterations' information are the right
   // size.  Also set the current best point value to the maximum.