From f5e2a548e1259aad494ee577a0e3849cd0fb07c8 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 18 Feb 2017 18:34:59 +0100 Subject: [PATCH 01/78] Add implementation of the randomized block krylov svd method. --- src/mlpack/methods/CMakeLists.txt | 1 + .../methods/block_krylov_svd/CMakeLists.txt | 15 ++ .../randomized_block_krylov_svd.cpp | 88 ++++++++++++ .../randomized_block_krylov_svd.hpp | 130 ++++++++++++++++++ 4 files changed, 234 insertions(+) create mode 100644 src/mlpack/methods/block_krylov_svd/CMakeLists.txt create mode 100644 src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp create mode 100644 src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index dde69de0b1b..2218c378c10 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -21,6 +21,7 @@ set(DIRS approx_kfn amf ann + block_krylov_svd cf decision_stump det diff --git a/src/mlpack/methods/block_krylov_svd/CMakeLists.txt b/src/mlpack/methods/block_krylov_svd/CMakeLists.txt new file mode 100644 index 00000000000..6380befb28a --- /dev/null +++ b/src/mlpack/methods/block_krylov_svd/CMakeLists.txt @@ -0,0 +1,15 @@ +# Define the files we need to compile. +# Anything not in this list will not be compiled into mlpack. +set(SOURCES + randomized_block_krylov_svd.hpp + randomized_block_krylov_svd.cpp +) + +# Add directory name to sources. +set(DIR_SRCS) +foreach(file ${SOURCES}) + set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) +endforeach() +# Append sources (with directory name) to list of all mlpack sources (used at +# the parent scope). +set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp new file mode 100644 index 00000000000..9009c5f6422 --- /dev/null +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp @@ -0,0 +1,88 @@ +/** + * @file randomized_block_krylov_svd.cpp + * @author Marcus Edel + * + * Implementation of the randomized SVD method. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#include "randomized_block_krylov_svd.hpp" + +namespace mlpack { +namespace svd { + +RandomizedBlockKrylovSVD::RandomizedBlockKrylovSVD(const arma::mat& data, + arma::mat& u, + arma::vec& s, + arma::mat& v, + const size_t maxIterations, + const size_t rank, + const size_t blockSize) : + maxIterations(maxIterations), + blockSize(blockSize) +{ + if (rank == 0) + { + Apply(data, u, s, v, data.n_rows); + } + else + { + Apply(data, u, s, v, rank); + } +} + +RandomizedBlockKrylovSVD::RandomizedBlockKrylovSVD(const size_t maxIterations, + const size_t blockSize) : + maxIterations(maxIterations), + blockSize(blockSize) +{ + /* Nothing to do here */ +} + +void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, + arma::mat& u, + arma::vec& s, + arma::mat& v, + const size_t rank) +{ + arma::mat Q, R, block; + + if (blockSize == 0) + { + blockSize = rank + 10; + } + + // Random block initialization. + arma::mat G = arma::randn(data.n_rows, blockSize); + + // Construct and orthonormalize Krlov subspace. + arma::mat K(data.n_rows, blockSize * (maxIterations + 1)); + arma::qr_econ(block, R, data * G); + + // Copy the temporary memory to the right place. + K.submat(0, 0, block.n_rows - 1, block.n_cols - 1) = block; + + for (size_t i = 0, b = block.n_cols; i < maxIterations; ++i, + b += block.n_cols) + { + arma::qr_econ(block, R, data * (data.t() * block)); + K.submat(0, b, block.n_rows - 1, b + block.n_cols - 1) = block; + } + + arma::qr_econ(Q, R, K); + + // Approximate eigenvalues and eigenvectors using Rayleigh–Ritz method. + arma::svd_econ(u, s, v, Q.t() * data); + + // Do economical singular value decomposition and compute only the + // approximations of the left singular vectors by using the centered data + // applied to Q. + u = Q * u; +} + +} // namespace svd +} // namespace mlpack diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp new file mode 100644 index 00000000000..964a654ea9d --- /dev/null +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp @@ -0,0 +1,130 @@ +/** + * @file randomized_block_krylov_svd.hpp + * @author Marcus Edel + * + * An implementation of the randomized SVD method. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#ifndef MLPACK_METHODS_BLOCK_KRYLOV_SVD_RANDOMIZED_BLOCK_KRYLOV_SVD_HPP +#define MLPACK_METHODS_BLOCK_KRYLOV_SVD_RANDOMIZED_BLOCK_KRYLOV_SVD_HPP + +#include + +namespace mlpack { +namespace svd { + +/** + * Randomized block krylov SVD is a matrix factorization that is based on + * randomized matrix approximation techniques, developed in in + * "Randomized Block Krylov Methods for Stronger and Faster Approximate + * Singular Value Decomposition". + * + * For more information, see the following. + * + * @code + * @inproceedings{Musco2015, + * author = {Cameron Musco and Christopher Musco}, + * title = {Randomized Block Krylov Methods for Stronger and Faster + * Approximate Singular Value Decomposition}, + * booktitle = {Advances in Neural Information Processing Systems 28: Annual + * Conference on Neural Information Processing Systems 2015, + * December 7-12, 2015, Montreal, Quebec, Canada}, + * pages = {1396--1404}, + * year = {2015}, + * } + * @endcode + * + * @code + * + * An example of how to use the interface is shown below: + * + * @code + * arma::mat data; // Rating data in the form of coordinate list. + * + * const size_t rank = 20; // Rank used for the decomposition. + * + * // Make a RandomizedBlockKrylovSVD object. + * RandomizedBlockKrylovSVD bSVD(); + * + * arma::mat u, s, v; + * + * // Use the Apply() method to get a factorization. + * bSVD.Apply(data, u, s, v, rank); + * @endcode + */ +class RandomizedBlockKrylovSVD +{ + public: + /** + * Create object for the randomized block krylov SVD method. + * + * @param data Data matrix. + * @param u First unitary matrix. + * @param v Second unitary matrix. + * @param s Diagonal matrix of singular values. + * @param maxIterations Number of iterations for the power method + * (Default: 2). + * @param rank Rank of the approximation (Default: number of rows.) + * @param blockSize The block size, must be >= rank (Default: rank + 10). + */ + RandomizedBlockKrylovSVD(const arma::mat& data, + arma::mat& u, + arma::vec& s, + arma::mat& v, + const size_t maxIterations = 2, + const size_t rank = 0, + const size_t blockSize = 0); + + /** + * Create object for the randomized block krylov SVD method. + * + * @param maxIterations Number of iterations for the power method + * (Default: 2). + * @param blockSize The block size, must be >= rank (Default: rank + 10). + */ + RandomizedBlockKrylovSVD(const size_t maxIterations = 2, + const size_t blockSize = 0); + + /** + * Apply Principal Component Analysis to the provided data set using the + * randomized block krylov SVD. + * + * @param data Data matrix. + * @param u First unitary matrix. + * @param v Second unitary matrix. + * @param s Diagonal matrix of singular values. + * @param rank Rank of the approximation. + */ + void Apply(const arma::mat& data, + arma::mat& u, + arma::vec& s, + arma::mat& v, + const size_t rank); + + //! Get the number of iterations for the power method. + size_t MaxIterations() const { return maxIterations; } + //! Modify the number of iterations for the power method. + size_t& MaxIterations() { return maxIterations; } + + //! Get the block size. + size_t BlockSize() const { return blockSize; } + //! Modify the block size. + size_t& BlockSize() { return blockSize; } + + private: + //! Locally stored number of iterations for the power method. + size_t maxIterations; + + //! The block size value. + size_t blockSize; +}; + +} // namespace svd +} // namespace mlpack + +#endif From 4db6379cd8297932145aff7df9f649d779e7186f Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 18 Feb 2017 18:35:49 +0100 Subject: [PATCH 02/78] Add test cases for the randomized block krylov svd method. --- src/mlpack/tests/CMakeLists.txt | 1 + src/mlpack/tests/block_krylov_svd_test.cpp | 116 +++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 src/mlpack/tests/block_krylov_svd_test.cpp diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index 514c4534502..8e95e6069bb 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -11,6 +11,7 @@ add_executable(mlpack_test armadillo_svd_test.cpp aug_lagrangian_test.cpp binarize_test.cpp + block_krylov_svd_test.cpp cf_test.cpp cli_test.cpp convolution_test.cpp diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp new file mode 100644 index 00000000000..4ca9c6258d2 --- /dev/null +++ b/src/mlpack/tests/block_krylov_svd_test.cpp @@ -0,0 +1,116 @@ +/** + * @file block_krylov_svd_test.cpp + * @author Marcus Edel + * + * Test file for the Randomized Block Krylov SVD class. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#include +#include + +#include +#include "test_tools.hpp" + +BOOST_AUTO_TEST_SUITE(BlockKrylovSVDTest); + +using namespace mlpack; + +// Generate a low rank matrix with bell-shaped singular values. +void CreateNoisyLowRankMatrix(arma::mat& data, + const size_t rows, + const size_t cols, + const size_t rank, + const double strength) +{ + arma::mat R, U, V; + const size_t n = std::min(rows, cols); + + arma::qr_econ(U, R, arma::randn(rows, n)); + arma::qr_econ(V, R, arma::randn(cols, n)); + + arma::vec ids = arma::linspace(0, n - 1, n); + + arma::vec lowRank = ((1 - strength) * + arma::exp(-1.0 * arma::pow((ids / rank), 2))); + arma::vec tail = strength * arma::exp(-0.1 * ids / rank); + + arma::mat s = arma::eye(n, n) * (lowRank + tail); + data = (U * s) * V.t(); +} + +/** + * The reconstruction and sigular value error of the obtained SVD should be + * small. + */ +BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDReconstructionError) +{ + arma::mat U = arma::randn(3, 20); + arma::mat V = arma::randn(10, 3); + + arma::mat R; + arma::qr_econ(U, R, U); + arma::qr_econ(V, R, V); + + arma::mat s = arma::diagmat(arma::vec("1 0.1 0.01")); + + arma::mat data = arma::trans(U * arma::diagmat(s) * V.t()); + + // Center the data into a temporary matrix. + arma::mat centeredData; + math::Center(data, centeredData); + + arma::mat U1, U2, V1, V2; + arma::vec s1, s2, s3; + + arma::svd_econ(U1, s1, V1, centeredData); + + svd::RandomizedBlockKrylovSVD rSVD(20, 10); + rSVD.Apply(centeredData, U2, s2, V2, 3); + + // Use the same amount of data for the compariosn (matrix rank). + s3 = s1.subvec(0, s2.n_elem - 1); + + // The sigular value error should be small. + double error = arma::norm(s2 - s3, "frob") / arma::norm(s2, "frob"); + BOOST_REQUIRE_SMALL(error, 1e-5); + + arma::mat reconstruct = U2 * arma::diagmat(s2) * V2.t(); + + // The relative reconstruction error should be small. + error = arma::norm(centeredData - reconstruct, "frob") / + arma::norm(centeredData, "frob"); + BOOST_REQUIRE_SMALL(error, 1e-5); +} + +/* + * Check if the method can handle noisy matrices. + */ +BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNosiyLowRankTest) +{ + arma::mat data; + CreateNoisyLowRankMatrix(data, 100, 1000, 5, 1.0); + + const size_t rank = 5; + + arma::mat U1, U2, V1, V2; + arma::vec s1, s2, s3; + + arma::svd_econ(U1, s1, V1, data); + + svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 1, rank, 5); + + double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank))); + BOOST_REQUIRE_SMALL(error, 0.1); + + svd::RandomizedBlockKrylovSVD rSVDB(data, U2, s2, V2, 10, rank, 20); + + error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank))); + BOOST_REQUIRE_SMALL(error, 1e-3); +} + +BOOST_AUTO_TEST_SUITE_END(); From 397ef54120ceed8ad463e9db6d93f158e6a02eff Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 19 Feb 2017 17:53:02 +0100 Subject: [PATCH 03/78] Remove unused open @code block. --- .../methods/block_krylov_svd/randomized_block_krylov_svd.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp index 964a654ea9d..f7d77ab4c50 100644 --- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp @@ -39,8 +39,6 @@ namespace svd { * } * @endcode * - * @code - * * An example of how to use the interface is shown below: * * @code From 6cb13c1c92ea82287171ae7d2baa9c1dab77bc6a Mon Sep 17 00:00:00 2001 From: Abhinav Moudgil Date: Wed, 29 Mar 2017 17:56:29 +0530 Subject: [PATCH 04/78] Fix VanillaNetworkTest --- .../tests/convolutional_network_test.cpp | 88 ++++++++----------- 1 file changed, 38 insertions(+), 50 deletions(-) diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp index efafcc9c24c..6d79f7605b2 100644 --- a/src/mlpack/tests/convolutional_network_test.cpp +++ b/src/mlpack/tests/convolutional_network_test.cpp @@ -1,6 +1,7 @@ /** * @file convolutional_network_test.cpp * @author Marcus Edel + * @author Abhinav Moudgil * * Tests the convolutional neural network. * @@ -23,7 +24,6 @@ using namespace mlpack; using namespace mlpack::ann; using namespace mlpack::optimization; - BOOST_AUTO_TEST_SUITE(ConvolutionalNetworkTest); /** @@ -47,11 +47,13 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) { if (i < nPoints / 2) { - Y(i) = 4; + // class 1 - digit = 4 + Y(i) = 1; } else { - Y(i) = 9; + // class 2 - digit = 9 + Y(i) = 2; } } @@ -73,60 +75,46 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) * +---+ +---+ +---+ +---+ +---+ +---+ */ - // It isn't guaranteed that the network will converge in the specified number - // of iterations using random weights. If this works 1 of 5 times, I'm fine - // with that. All I want to know is that the network is able to escape from - // local minima and to solve the task. - size_t successes = 0; - for (size_t trial = 0; trial < 5; ++trial) + FFN, RandomInitialization> model; + + model.Add >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28); + model.Add >(); + model.Add >(8, 8, 2, 2); + model.Add >(8, 12, 2, 2); + model.Add >(); + model.Add >(2, 2, 2, 2); + model.Add >(192, 20); + model.Add >(); + model.Add >(20, 10); + model.Add >(); + model.Add >(10, 2); + model.Add >(); + + RMSprop opt(model, 0.001, 0.88, 1e-8, 5000, -1); + + model.Train(std::move(X), std::move(Y), opt); + + arma::mat predictionTemp; + model.Predict(X, predictionTemp); + arma::mat prediction = arma::zeros(1, predictionTemp.n_cols); + + for (size_t i = 0; i < predictionTemp.n_cols; ++i) { - FFN, GaussianInitialization> model; - - model.Add >(1, 8, 5, 5, 1, 1, 0, 0, 28, 28); - model.Add >(); - model.Add >(8, 8, 2, 2); - model.Add >(8, 12, 2, 2); - model.Add >(); - model.Add >(2, 2, 2, 2); - model.Add >(192, 20); - model.Add >(); - model.Add >(20, 30); - model.Add >(); - model.Add >(30, 10); - model.Add >(); - - RMSprop opt(model, 0.01, 0.88, 1e-8, 5000, -1); - - model.Train(std::move(X), std::move(Y), opt); - - arma::mat predictionTemp; - model.Predict(X, predictionTemp); - arma::mat prediction = arma::zeros(1, predictionTemp.n_cols); - - for (size_t i = 0; i < predictionTemp.n_cols; ++i) - { - prediction(i) = arma::as_scalar(arma::find( + prediction(i) = arma::as_scalar(arma::find( arma::max(predictionTemp.col(i)) == predictionTemp.col(i), 1)) + 1; - } - - size_t error = 0; - for (size_t i = 0; i < X.n_cols; i++) - { - if (prediction(i) == Y(i)) - { - error++; - } - } + } - double classificationError = 1 - double(error) / X.n_cols; - if (classificationError <= 0.2) + size_t correct = 0; + for (size_t i = 0; i < X.n_cols; i++) + { + if (prediction(i) == Y(i)) { - ++successes; - break; + correct++; } } - BOOST_REQUIRE_GE(successes, 1); + double classificationError = 1 - double(correct) / X.n_cols; + BOOST_REQUIRE_LE(classificationError, 0.2); } BOOST_AUTO_TEST_SUITE_END(); From c7872c20c610e45ac24ed5e35b9a35c11a84fb78 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Wed, 1 Mar 2017 21:07:00 +0530 Subject: [PATCH 05/78] Implement the SMORMS3 optimizer --- .../core/optimizers/smorms3/CMakeLists.txt | 11 ++ .../core/optimizers/smorms3/smorms3.hpp | 153 ++++++++++++++++++ .../core/optimizers/smorms3/smorms3_impl.hpp | 146 +++++++++++++++++ 3 files changed, 310 insertions(+) create mode 100644 src/mlpack/core/optimizers/smorms3/CMakeLists.txt create mode 100644 src/mlpack/core/optimizers/smorms3/smorms3.hpp create mode 100644 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp diff --git a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt new file mode 100644 index 00000000000..68b6de814cd --- /dev/null +++ b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt @@ -0,0 +1,11 @@ +set(SOURCES + smorms3.hpp + smorms3_impl.hpp +) + +set(DIR_SRCS) +foreach(file ${SOURCES}) + set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) +endforeach() + +set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp new file mode 100644 index 00000000000..d4ac9810b58 --- /dev/null +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -0,0 +1,153 @@ +/** + * @file smorms3.hpp + * @author Vivek Pal + * + * SMORMS3 i.e. squared mean over root mean squared cubed optimizer. It is a + * hybrid of RMSprop, which estimates a safe and optimal distance based on + * curvature and Yann LeCun’s method in "No more pesky learning rates". + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP +#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP + +#include + +namespace mlpack { +namespace optimization { + +/** + * SMORMS3 is an optimizer that estimates a safe and optimal distance based on + * curvature and normalizing the stepsize in the parameter space. It is a hybrid + * of RMSprop and Yann LeCun’s method in "No more pesky learning rates". + * + * For more information, see the following. + * + * @code + * @misc{Funk2015, + * author = {Simon Funk}, + * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, + * year = {2015} + * } + * @endcode + * + * + * For SMORMS3 to work, a DecomposableFunctionType template parameter is required. + * This class must implement the following function: + * + * size_t NumFunctions(); + * double Evaluate(const arma::mat& coordinates, const size_t i); + * void Gradient(const arma::mat& coordinates, + * const size_t i, + * arma::mat& gradient); + * + * NumFunctions() should return the number of functions (\f$n\f$), and in the + * other two functions, the parameter i refers to which individual function (or + * gradient) is being evaluated. So, for the case of a data-dependent function, + * such as NCA (see mlpack::nca::NCA), NumFunctions() should return the number + * of points in the dataset, and Evaluate(coordinates, 0) will evaluate the + * objective function on the first point in the dataset (presumably, the dataset + * is held internally in the DecomposableFunctionType). + * + * @tparam DecomposableFunctionType Decomposable objective function type to be + * minimized. + */ +template +class SMORMS3 +{ + public: + /** + * Construct the SMORMS3 optimizer with the given function and parameters. The + * defaults here are not necessarily good for the given problem, so it is + * suggested that the values used be tailored to the task at hand. The + * maximum number of iterations refers to the maximum number of points that + * are processed (i.e., one iteration equals one point; one iteration does not + * equal one pass over the dataset). + * + * @param function Function to be optimized (minimized). + * @param lRate Learning rate or step size for each iteration. + * @param eps Value used to initialise the mean squared gradient parameter. + * @param maxIterations Maximum number of iterations allowed (0 means no + * limit). + * @param tolerance Maximum absolute tolerance to terminate algorithm. + * @param shuffle If true, the function order is shuffled; otherwise, each + * function is visited in linear order. + */ + SMORMS3(DecomposableFunctionType& function, + const double lRate = 0.001, + const double eps = 1e-16, + const size_t maxIterations = 100000, + const double tolerance = 1e-5, + const bool shuffle = true); + + /** + * Optimize the given function using SMORMS3. The given starting point will + * be modified to store the finishing point of the algorithm, and the final + * objective value is returned. + * + * @param iterate Starting point (will be modified). + * @return Objective value of the final point. + */ + double Optimize(arma::mat& iterate); + + //! Get the instantiated function to be optimized. + const DecomposableFunctionType& Function() const { return function; } + //! Modify the instantiated function. + DecomposableFunctionType& Function() { return function; } + + //! Get the learning rate. + double LRate() const { return lRate; } + //! Modify the learning rate. + double& LRate() { return lRate; } + + //! Get the value used to initialise the mean squared gradient parameter. + double Epsilon() const { return eps; } + //! Modify the value used to initialise the mean squared gradient parameter. + double& Epsilon() { return eps; } + + //! Get the maximum number of iterations (0 indicates no limit). + size_t MaxIterations() const { return maxIterations; } + //! Modify the maximum number of iterations (0 indicates no limit). + size_t& MaxIterations() { return maxIterations; } + + //! Get the tolerance for termination. + double Tolerance() const { return tolerance; } + //! Modify the tolerance for termination. + double& Tolerance() { return tolerance; } + + //! Get whether or not the individual functions are shuffled. + bool Shuffle() const { return shuffle; } + //! Modify whether or not the individual functions are shuffled. + bool& Shuffle() { return shuffle; } + + private: + //! The instantiated function. + DecomposableFunctionType& function; + + //! The learning rate for each example. + double lRate; + + //! The value used to initialise the mean squared gradient parameter. + double eps; + + //! The maximum number of allowed iterations. + size_t maxIterations; + + //! The tolerance for termination. + double tolerance; + + //! Controls whether or not the individual functions are shuffled when + //! iterating. + bool shuffle; +}; + +} // namespace optimization +} // namespace mlpack + +// Include implementation. +#include "smorms3_impl.hpp" + +#endif diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp new file mode 100644 index 00000000000..f7c5f37cd74 --- /dev/null +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -0,0 +1,146 @@ +/** + * @file smorms3_impl.hpp + * @author Vivek Pal + * + * Implementation of the SMORMS3 optimizer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP +#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP + +// In case it hasn't been included yet. +#include "smorms3.hpp" + +namespace mlpack { +namespace optimization { + +template +SMORMS3::SMORMS3(DecomposableFunctionType& function, + const double lRate, + const double eps, + const size_t maxIterations, + const double tolerance, + const bool shuffle) : + function(function), + lRate(lRate), + eps(eps), + maxIterations(maxIterations), + tolerance(tolerance), + shuffle(shuffle) +{ /* Nothing to do. */ } + +//! Optimize the function (minimize). +template +double SMORMS3::Optimize(arma::mat& iterate) +{ + // Find the number of functions to use. + const size_t numFunctions = function.NumFunctions(); + + // This is used only if shuffle is true. + arma::Col visitationOrder; + if (shuffle) + visitationOrder = arma::shuffle(arma::linspace>(0, + (numFunctions - 1), numFunctions)); + + // To keep track of where we are and how things are going. + size_t currentFunction = 0; + double overallObjective = 0; + double lastObjective = DBL_MAX; + + // Calculate the first objective function. + for (size_t i = 0; i < numFunctions; ++i) + overallObjective += function.Evaluate(iterate, i); + + // Initialise the parameters mem, g and g2. + arma::mat mem = arma::ones(iterate.n_rows, iterate.n_cols); + + arma::mat g = arma::zeros(iterate.n_rows, iterate.n_cols); + + arma::mat g2 = arma::zeros(iterate.n_rows, iterate.n_cols); + + // Now iterate! + arma::mat gradient(iterate.n_rows, iterate.n_cols); + + for (size_t i = 1; i != maxIterations; ++i, ++currentFunction) + { + // Is this iteration the start of a sequence? + if ((currentFunction % numFunctions) == 0) + { + // Output current objective function. + Log::Info << "SMORMS3: iteration " << i << ", objective " + << overallObjective << "." << std::endl; + + if (std::isnan(overallObjective) || std::isinf(overallObjective)) + { + Log::Warn << "SMORMS3: converged to " << overallObjective + << "; terminating with failure. Try a smaller step size?" + << std::endl; + return overallObjective; + } + + if (std::abs(lastObjective - overallObjective) < tolerance) + { + Log::Info << "SMORMS3: minimized within tolerance " << tolerance << "; " + << "terminating optimization." << std::endl; + return overallObjective; + } + + // Reset the counter variables. + lastObjective = overallObjective; + overallObjective = 0; + currentFunction = 0; + + if (shuffle) // Determine order of visitation. + visitationOrder = arma::shuffle(visitationOrder); + } + + // Evaluate the gradient for this iteration. + if (shuffle) + function.Gradient(iterate, visitationOrder[currentFunction], gradient); + else + function.Gradient(iterate, currentFunction, gradient); + + // And update the iterate. + arma::mat r = 1 / (mem + 1); + + g = (1 - r) % g; + g += r % gradient; + + g2 = (1 - r) % g2; + g2 += r % (gradient % gradient); + + arma::mat x = (g % g) / (g2 + eps); + + arma::mat lRateMat(x.n_rows, x.n_cols); + x.fill(lRate); + + iterate -= gradient * arma::min(x, lRate) / (arma::sqrt(g2) + eps); + + mem *= (1 - x); + mem += 1; + + // Now add that to the overall objective function. + if (shuffle) + overallObjective += function.Evaluate(iterate, + visitationOrder[currentFunction]); + else + overallObjective += function.Evaluate(iterate, currentFunction); + } + + Log::Info << "SMORMS3: maximum iterations (" << maxIterations << ") reached; " + << "terminating optimization." << std::endl; + // Calculate final objective. + overallObjective = 0; + for (size_t i = 0; i < numFunctions; ++i) + overallObjective += function.Evaluate(iterate, i); + return overallObjective; +} + +} // namespace optimization +} // namespace mlpack + +#endif From 946ba643d094c6407ba1e65940c8dea129981639 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Wed, 1 Mar 2017 21:09:02 +0530 Subject: [PATCH 06/78] Add tests for SMORMS3 optimizer --- src/mlpack/tests/smorms3_test.cpp | 109 ++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 src/mlpack/tests/smorms3_test.cpp diff --git a/src/mlpack/tests/smorms3_test.cpp b/src/mlpack/tests/smorms3_test.cpp new file mode 100644 index 00000000000..370e6d3f8de --- /dev/null +++ b/src/mlpack/tests/smorms3_test.cpp @@ -0,0 +1,109 @@ +/** + * @file smorms3_test.cpp + * @author Vivek Pal + * + * Tests the SMORMS3 optimizer. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#include +#include +#include + +#include +#include "test_tools.hpp" + +using namespace arma; +using namespace mlpack::optimization; +using namespace mlpack::optimization::test; + +using namespace mlpack::distribution; +using namespace mlpack::regression; + +using namespace mlpack; + +BOOST_AUTO_TEST_SUITE(SMORMS3Test); + +/** + * Tests the SMORMS3 optimizer using a simple test function. + */ +BOOST_AUTO_TEST_CASE(SimpleSMORMS3TestFunction) +{ + SGDTestFunction f; + SMORMS3 optimizer(f, 1e-3, 1e-16, 5000000, 1e-9, true); + + arma::mat coordinates = f.GetInitialPoint(); + optimizer.Optimize(coordinates); + + BOOST_REQUIRE_SMALL(coordinates[0], 0.1); + BOOST_REQUIRE_SMALL(coordinates[1], 0.1); + BOOST_REQUIRE_SMALL(coordinates[2], 0.1); +} + +/** + * Run SMORMS3 on logistic regression and make sure the results are acceptable. + */ +BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest) +{ + // Generate a two-Gaussian dataset. + GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye(3, 3)); + GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye(3, 3)); + + arma::mat data(3, 1000); + arma::Row responses(1000); + for (size_t i = 0; i < 500; ++i) + { + data.col(i) = g1.Random(); + responses[i] = 0; + } + for (size_t i = 500; i < 1000; ++i) + { + data.col(i) = g2.Random(); + responses[i] = 1; + } + + // Shuffle the dataset. + arma::uvec indices = arma::shuffle(arma::linspace(0, + data.n_cols - 1, data.n_cols)); + arma::mat shuffledData(3, 1000); + arma::Row shuffledResponses(1000); + for (size_t i = 0; i < data.n_cols; ++i) + { + shuffledData.col(i) = data.col(indices[i]); + shuffledResponses[i] = responses[indices[i]]; + } + + // Create a test set. + arma::mat testData(3, 1000); + arma::Row testResponses(1000); + for (size_t i = 0; i < 500; ++i) + { + testData.col(i) = g1.Random(); + testResponses[i] = 0; + } + for (size_t i = 500; i < 1000; ++i) + { + testData.col(i) = g2.Random(); + testResponses[i] = 1; + } + + LogisticRegression<> lr(shuffledData.n_rows, 0.5); + + LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5); + SMORMS3 > smorms3(lrf); + lr.Train(smorms3); + + // Ensure that the error is close to zero. + const double acc = lr.ComputeAccuracy(data, responses); + BOOST_REQUIRE_CLOSE(acc, 100.0, 0.3); // 0.3% error tolerance. + + const double testAcc = lr.ComputeAccuracy(testData, testResponses); + BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance. +} + +BOOST_AUTO_TEST_SUITE_END(); From 4b4c2d76c78ef39142a1cf10680f76d5003cacb3 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Wed, 1 Mar 2017 21:11:07 +0530 Subject: [PATCH 07/78] Update optimizers and tests CMakeLists --- src/mlpack/core/optimizers/CMakeLists.txt | 1 + src/mlpack/tests/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/src/mlpack/core/optimizers/CMakeLists.txt b/src/mlpack/core/optimizers/CMakeLists.txt index c0b5f563f58..072fd27b32e 100644 --- a/src/mlpack/core/optimizers/CMakeLists.txt +++ b/src/mlpack/core/optimizers/CMakeLists.txt @@ -10,6 +10,7 @@ set(DIRS sa sdp sgd + smorms3 ) foreach(dir ${DIRS}) diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index 8f18594a9dc..b93077b25ed 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -82,6 +82,7 @@ add_executable(mlpack_test serialization.hpp serialization.cpp serialization_test.cpp + smorms3_test.cpp softmax_regression_test.cpp sort_policy_test.cpp sparse_autoencoder_test.cpp From ddb3128601eb0d8ea0ac5764765cf11a4f02e32f Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 2 Mar 2017 13:56:00 +0530 Subject: [PATCH 08/78] Fill matrix lRateMat with lRate not matrix x --- src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index f7c5f37cd74..1e7fcd4daa2 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -116,7 +116,7 @@ double SMORMS3::Optimize(arma::mat& iterate) arma::mat x = (g % g) / (g2 + eps); arma::mat lRateMat(x.n_rows, x.n_cols); - x.fill(lRate); + lRateMat.fill(lRate); iterate -= gradient * arma::min(x, lRate) / (arma::sqrt(g2) + eps); From c812c18d836c6de74f3ee2deb1172fa66570ec66 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 2 Mar 2017 13:56:52 +0530 Subject: [PATCH 09/78] Fix a typo --- src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index 1e7fcd4daa2..67f1190a10f 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -118,7 +118,7 @@ double SMORMS3::Optimize(arma::mat& iterate) arma::mat lRateMat(x.n_rows, x.n_cols); lRateMat.fill(lRate); - iterate -= gradient * arma::min(x, lRate) / (arma::sqrt(g2) + eps); + iterate -= gradient * arma::min(x, lRateMat) / (arma::sqrt(g2) + eps); mem *= (1 - x); mem += 1; From 9945b04f255fb7135290b9df57d60efaa6aef187 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 2 Mar 2017 13:59:29 +0530 Subject: [PATCH 10/78] Use (%) Schur product instead of (*) Matrix multiplication --- src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index 67f1190a10f..5f305bcadf8 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -118,7 +118,7 @@ double SMORMS3::Optimize(arma::mat& iterate) arma::mat lRateMat(x.n_rows, x.n_cols); lRateMat.fill(lRate); - iterate -= gradient * arma::min(x, lRateMat) / (arma::sqrt(g2) + eps); + iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps); mem *= (1 - x); mem += 1; From dac421821eb0694b898932376689d09092377720 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 2 Mar 2017 14:02:31 +0530 Subject: [PATCH 11/78] Remove unnecessary newlines --- src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index 5f305bcadf8..3cc8fe48b0f 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -57,9 +57,7 @@ double SMORMS3::Optimize(arma::mat& iterate) // Initialise the parameters mem, g and g2. arma::mat mem = arma::ones(iterate.n_rows, iterate.n_cols); - arma::mat g = arma::zeros(iterate.n_rows, iterate.n_cols); - arma::mat g2 = arma::zeros(iterate.n_rows, iterate.n_cols); // Now iterate! From 558de23e1d7bbcbd53a92c757ebc98c081bd7c1e Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 9 Mar 2017 20:40:01 +0530 Subject: [PATCH 12/78] Move initialisation of lRateMat outside the for loop --- src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index 3cc8fe48b0f..44c2da02723 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -60,6 +60,10 @@ double SMORMS3::Optimize(arma::mat& iterate) arma::mat g = arma::zeros(iterate.n_rows, iterate.n_cols); arma::mat g2 = arma::zeros(iterate.n_rows, iterate.n_cols); + // Initialise a matrix filled with lRate. + arma::mat lRateMat(iterate.n_rows, iterate.n_cols); + lRateMat.fill(lRate); + // Now iterate! arma::mat gradient(iterate.n_rows, iterate.n_cols); @@ -113,9 +117,6 @@ double SMORMS3::Optimize(arma::mat& iterate) arma::mat x = (g % g) / (g2 + eps); - arma::mat lRateMat(x.n_rows, x.n_cols); - lRateMat.fill(lRate); - iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps); mem *= (1 - x); From 03408c5742b93c2d28b6046f18d2ca2cf74b621c Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 9 Mar 2017 20:44:23 +0530 Subject: [PATCH 13/78] Remove leading underscores in the macros --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++-- src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index d4ac9810b58..dcf971128db 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -11,8 +11,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP -#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP +#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP +#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP #include diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index 44c2da02723..3259d379666 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -9,8 +9,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP -#define __MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP +#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP +#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP // In case it hasn't been included yet. #include "smorms3.hpp" From 8ec825c738d5a4ff3575eb746537a6795e100210 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 9 Mar 2017 20:50:58 +0530 Subject: [PATCH 14/78] Rename eps parameter in the constructor as epsilon --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index dcf971128db..0b1359d929a 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -69,7 +69,7 @@ class SMORMS3 * * @param function Function to be optimized (minimized). * @param lRate Learning rate or step size for each iteration. - * @param eps Value used to initialise the mean squared gradient parameter. + * @param epsilon Value used to initialise the mean squared gradient parameter. * @param maxIterations Maximum number of iterations allowed (0 means no * limit). * @param tolerance Maximum absolute tolerance to terminate algorithm. @@ -78,7 +78,7 @@ class SMORMS3 */ SMORMS3(DecomposableFunctionType& function, const double lRate = 0.001, - const double eps = 1e-16, + const double epsilon = 1e-16, const size_t maxIterations = 100000, const double tolerance = 1e-5, const bool shuffle = true); From d798b3dfb1dd3b359385d961e3d533b5c72f1afd Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 9 Mar 2017 20:54:32 +0530 Subject: [PATCH 15/78] Rename function LRate as LearningRate --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 0b1359d929a..c173a8ed34b 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -99,9 +99,9 @@ class SMORMS3 DecomposableFunctionType& Function() { return function; } //! Get the learning rate. - double LRate() const { return lRate; } + double LearningRate() const { return lRate; } //! Modify the learning rate. - double& LRate() { return lRate; } + double& LearningRate() { return lRate; } //! Get the value used to initialise the mean squared gradient parameter. double Epsilon() const { return eps; } From a281494636dd6efa1419d0442fd0a80a9aa7f1ad Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Wed, 15 Mar 2017 21:35:48 +0530 Subject: [PATCH 16/78] Implement SMORMS3 optimizer as an update policy of SGD * SMORMS3Update policy class implements the update steps of SMORMS3 algorithm. * SMORMS3 class as a wrapper leveraging the SGD class. --- src/mlpack/core/optimizers/sgd/sgd.hpp | 1 + .../sgd/update_policies/CMakeLists.txt | 1 + .../sgd/update_policies/smorms3_update.hpp | 108 +++++++++++++ .../core/optimizers/smorms3/CMakeLists.txt | 1 - .../core/optimizers/smorms3/smorms3.hpp | 12 +- .../core/optimizers/smorms3/smorms3_impl.hpp | 145 ------------------ src/mlpack/tests/smorms3_test.cpp | 11 +- 7 files changed, 124 insertions(+), 155 deletions(-) create mode 100644 src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp delete mode 100644 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp diff --git a/src/mlpack/core/optimizers/sgd/sgd.hpp b/src/mlpack/core/optimizers/sgd/sgd.hpp index 4dcb4eb54fe..8f7c348feeb 100644 --- a/src/mlpack/core/optimizers/sgd/sgd.hpp +++ b/src/mlpack/core/optimizers/sgd/sgd.hpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace mlpack { namespace optimization { diff --git a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt index 36d87a97ebe..b6e4a0dd8b0 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt +++ b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt @@ -1,6 +1,7 @@ set(SOURCES vanilla_update.hpp momentum_update.hpp + smorms3_update.hpp ) set(DIR_SRCS) diff --git a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp new file mode 100644 index 00000000000..8d13cf48217 --- /dev/null +++ b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp @@ -0,0 +1,108 @@ +/** + * @file smorms3_update.hpp + * @author Vivek Pal + * + * SMORMS3 update for Stochastic Gradient Descent. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP +#define MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP + +#include + +namespace mlpack { +namespace optimization { + +/** + * SMORMS3 is an optimizer that estimates a safe and optimal distance based on + * curvature and normalizing the stepsize in the parameter space. It is a hybrid + * of RMSprop and Yann LeCun’s method in "No more pesky learning rates". + * + * For more information, see the following. + * + * @code + * @misc{Funk2015, + * author = {Simon Funk}, + * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, + * year = {2015} + * } + * @endcode + */ + +class SMORMS3Update +{ + public: + /** + * Construct the SMORMS3 update policy with given epsilon parameter. + * + * @param psilon Value used to initialise the mean squared gradient parameter. + */ + SMORMS3Update(const double eps = 1e-16) : eps(eps) + { /* Do nothing. */ }; + + /** + * The Initialize method is called by SGD::Optimize method with UpdatePolicy + * SMORMS3Update before the start of the iteration update process. + * + * @param n_rows number of rows in the gradient matrix. + * @param n_cols number of columns in the gradient matrix. + * @param lRate + */ + void Initialize(const size_t n_rows, + const size_t n_cols) + { + // Initialise the parameters mem, g and g2. + mem = arma::ones(n_rows, n_cols); + g = arma::zeros(n_rows, n_cols); + g2 = arma::zeros(n_rows, n_cols); + + // Initialise a matrix to be filled with lRate. + lRateMat = arma::zeros(n_rows, n_cols); + } + + /** + * Update step for SMORMS3. + * + * @param iterate Parameter that minimizes the function. + * @param stepSize Step size to be used for the given iteration. + * @param gradient The gradient matrix. + */ + void Update(arma::mat& iterate, + double stepSize, + arma::mat& gradient) + { + // Update the iterate. + arma::mat r = 1 / (mem + 1); + + g = (1 - r) % g; + g += r % gradient; + + g2 = (1 - r) % g2; + g2 += r % (gradient % gradient); + + arma::mat x = (g % g) / (g2 + eps); + + lRateMat.fill(stepSize); + + iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps); + + mem %= (1 - x); + mem += 1; + } + private: + //! The value used to initialise the mean squared gradient parameter. + double eps; + // The parameters mem, g and g2. + arma::mat mem, g, g2; + // The matrix to be filled with lRate. + arma::mat lRateMat; +}; + +} // namespace optimization +} // namespace mlpack + +#endif \ No newline at end of file diff --git a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt index 68b6de814cd..cefe7c12883 100644 --- a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt +++ b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt @@ -1,6 +1,5 @@ set(SOURCES smorms3.hpp - smorms3_impl.hpp ) set(DIR_SRCS) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index c173a8ed34b..66775be1f8e 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -15,6 +15,7 @@ #define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_HPP #include +#include namespace mlpack { namespace optimization { @@ -91,7 +92,10 @@ class SMORMS3 * @param iterate Starting point (will be modified). * @return Objective value of the final point. */ - double Optimize(arma::mat& iterate); + double Optimize(const arma::mat& iterate) + { + return optimizer.Optimize(iterate); + } //! Get the instantiated function to be optimized. const DecomposableFunctionType& Function() const { return function; } @@ -142,12 +146,12 @@ class SMORMS3 //! Controls whether or not the individual functions are shuffled when //! iterating. bool shuffle; + + //! The Stochastic Gradient Descent object with SMORMS3 update policy. + SGD optimizer; }; } // namespace optimization } // namespace mlpack -// Include implementation. -#include "smorms3_impl.hpp" - #endif diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp deleted file mode 100644 index 3259d379666..00000000000 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ /dev/null @@ -1,145 +0,0 @@ -/** - * @file smorms3_impl.hpp - * @author Vivek Pal - * - * Implementation of the SMORMS3 optimizer. - * - * mlpack is free software; you may redistribute it and/or modify it under the - * terms of the 3-clause BSD license. You should have received a copy of the - * 3-clause BSD license along with mlpack. If not, see - * http://www.opensource.org/licenses/BSD-3-Clause for more information. - */ -#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP -#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP - -// In case it hasn't been included yet. -#include "smorms3.hpp" - -namespace mlpack { -namespace optimization { - -template -SMORMS3::SMORMS3(DecomposableFunctionType& function, - const double lRate, - const double eps, - const size_t maxIterations, - const double tolerance, - const bool shuffle) : - function(function), - lRate(lRate), - eps(eps), - maxIterations(maxIterations), - tolerance(tolerance), - shuffle(shuffle) -{ /* Nothing to do. */ } - -//! Optimize the function (minimize). -template -double SMORMS3::Optimize(arma::mat& iterate) -{ - // Find the number of functions to use. - const size_t numFunctions = function.NumFunctions(); - - // This is used only if shuffle is true. - arma::Col visitationOrder; - if (shuffle) - visitationOrder = arma::shuffle(arma::linspace>(0, - (numFunctions - 1), numFunctions)); - - // To keep track of where we are and how things are going. - size_t currentFunction = 0; - double overallObjective = 0; - double lastObjective = DBL_MAX; - - // Calculate the first objective function. - for (size_t i = 0; i < numFunctions; ++i) - overallObjective += function.Evaluate(iterate, i); - - // Initialise the parameters mem, g and g2. - arma::mat mem = arma::ones(iterate.n_rows, iterate.n_cols); - arma::mat g = arma::zeros(iterate.n_rows, iterate.n_cols); - arma::mat g2 = arma::zeros(iterate.n_rows, iterate.n_cols); - - // Initialise a matrix filled with lRate. - arma::mat lRateMat(iterate.n_rows, iterate.n_cols); - lRateMat.fill(lRate); - - // Now iterate! - arma::mat gradient(iterate.n_rows, iterate.n_cols); - - for (size_t i = 1; i != maxIterations; ++i, ++currentFunction) - { - // Is this iteration the start of a sequence? - if ((currentFunction % numFunctions) == 0) - { - // Output current objective function. - Log::Info << "SMORMS3: iteration " << i << ", objective " - << overallObjective << "." << std::endl; - - if (std::isnan(overallObjective) || std::isinf(overallObjective)) - { - Log::Warn << "SMORMS3: converged to " << overallObjective - << "; terminating with failure. Try a smaller step size?" - << std::endl; - return overallObjective; - } - - if (std::abs(lastObjective - overallObjective) < tolerance) - { - Log::Info << "SMORMS3: minimized within tolerance " << tolerance << "; " - << "terminating optimization." << std::endl; - return overallObjective; - } - - // Reset the counter variables. - lastObjective = overallObjective; - overallObjective = 0; - currentFunction = 0; - - if (shuffle) // Determine order of visitation. - visitationOrder = arma::shuffle(visitationOrder); - } - - // Evaluate the gradient for this iteration. - if (shuffle) - function.Gradient(iterate, visitationOrder[currentFunction], gradient); - else - function.Gradient(iterate, currentFunction, gradient); - - // And update the iterate. - arma::mat r = 1 / (mem + 1); - - g = (1 - r) % g; - g += r % gradient; - - g2 = (1 - r) % g2; - g2 += r % (gradient % gradient); - - arma::mat x = (g % g) / (g2 + eps); - - iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps); - - mem *= (1 - x); - mem += 1; - - // Now add that to the overall objective function. - if (shuffle) - overallObjective += function.Evaluate(iterate, - visitationOrder[currentFunction]); - else - overallObjective += function.Evaluate(iterate, currentFunction); - } - - Log::Info << "SMORMS3: maximum iterations (" << maxIterations << ") reached; " - << "terminating optimization." << std::endl; - // Calculate final objective. - overallObjective = 0; - for (size_t i = 0; i < numFunctions; ++i) - overallObjective += function.Evaluate(iterate, i); - return overallObjective; -} - -} // namespace optimization -} // namespace mlpack - -#endif diff --git a/src/mlpack/tests/smorms3_test.cpp b/src/mlpack/tests/smorms3_test.cpp index 370e6d3f8de..2ca3fb81fad 100644 --- a/src/mlpack/tests/smorms3_test.cpp +++ b/src/mlpack/tests/smorms3_test.cpp @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include @@ -35,10 +35,11 @@ BOOST_AUTO_TEST_SUITE(SMORMS3Test); BOOST_AUTO_TEST_CASE(SimpleSMORMS3TestFunction) { SGDTestFunction f; - SMORMS3 optimizer(f, 1e-3, 1e-16, 5000000, 1e-9, true); + SMORMS3Update smorms3Update(1e-16); + SGD s(f, 1e-3, 5000000, 1e-9, true, smorms3Update); arma::mat coordinates = f.GetInitialPoint(); - optimizer.Optimize(coordinates); + s.Optimize(coordinates); BOOST_REQUIRE_SMALL(coordinates[0], 0.1); BOOST_REQUIRE_SMALL(coordinates[1], 0.1); @@ -95,7 +96,7 @@ BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest) LogisticRegression<> lr(shuffledData.n_rows, 0.5); LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5); - SMORMS3 > smorms3(lrf); + SGD, SMORMS3Update > smorms3(lrf); lr.Train(smorms3); // Ensure that the error is close to zero. @@ -106,4 +107,4 @@ BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest) BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance. } -BOOST_AUTO_TEST_SUITE_END(); +BOOST_AUTO_TEST_SUITE_END(); \ No newline at end of file From 3c720d3b316bc9807ecb35bc94b145fccec7b0e7 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Wed, 15 Mar 2017 21:48:56 +0530 Subject: [PATCH 17/78] Let's stick with stepSize instead of lRate We currently use stepSize in other optimizers as well e.g. RMSprop etc and also, consistent code looks good. --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 66775be1f8e..0aff1420b88 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -69,7 +69,7 @@ class SMORMS3 * equal one pass over the dataset). * * @param function Function to be optimized (minimized). - * @param lRate Learning rate or step size for each iteration. + * @param stepSize Step size for each iteration. * @param epsilon Value used to initialise the mean squared gradient parameter. * @param maxIterations Maximum number of iterations allowed (0 means no * limit). @@ -78,7 +78,7 @@ class SMORMS3 * function is visited in linear order. */ SMORMS3(DecomposableFunctionType& function, - const double lRate = 0.001, + const double stepSize = 0.001, const double epsilon = 1e-16, const size_t maxIterations = 100000, const double tolerance = 1e-5, @@ -102,10 +102,10 @@ class SMORMS3 //! Modify the instantiated function. DecomposableFunctionType& Function() { return function; } - //! Get the learning rate. - double LearningRate() const { return lRate; } - //! Modify the learning rate. - double& LearningRate() { return lRate; } + //! Get the step size. + double StepSize() const { return stepSize; } + //! Modify the step size. + double& StepSize() { return stepSize; } //! Get the value used to initialise the mean squared gradient parameter. double Epsilon() const { return eps; } @@ -131,8 +131,8 @@ class SMORMS3 //! The instantiated function. DecomposableFunctionType& function; - //! The learning rate for each example. - double lRate; + //! The step size for each example. + double stepSize; //! The value used to initialise the mean squared gradient parameter. double eps; From 4950f54b200f84074ece96d49003bf43171b3b95 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 16 Mar 2017 01:06:22 +0530 Subject: [PATCH 18/78] fixup! Let's stick with stepSize instead of lRate --- .../sgd/update_policies/smorms3_update.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp index 8d13cf48217..9a7376e3de8 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp @@ -60,8 +60,8 @@ class SMORMS3Update g = arma::zeros(n_rows, n_cols); g2 = arma::zeros(n_rows, n_cols); - // Initialise a matrix to be filled with lRate. - lRateMat = arma::zeros(n_rows, n_cols); + // Initialise a matrix to be filled with stepSize. + stepSizeMat = arma::zeros(n_rows, n_cols); } /** @@ -86,9 +86,9 @@ class SMORMS3Update arma::mat x = (g % g) / (g2 + eps); - lRateMat.fill(stepSize); + stepSizeMat.fill(stepSize); - iterate -= gradient % arma::min(x, lRateMat) / (arma::sqrt(g2) + eps); + iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + eps); mem %= (1 - x); mem += 1; @@ -98,8 +98,8 @@ class SMORMS3Update double eps; // The parameters mem, g and g2. arma::mat mem, g, g2; - // The matrix to be filled with lRate. - arma::mat lRateMat; + // The matrix to be filled with stepSize. + arma::mat stepSizeMat; }; } // namespace optimization From 782c4a93d1d97c1226602ddc16a5a82b47f21a71 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 16 Mar 2017 09:15:48 +0530 Subject: [PATCH 19/78] Address review comments with the following changes: * Rename parameter eps as epsilon everywhere. * Make paramter stepSize & gradient const in SMORMS3Update::Update. * Move smorms3_update.hpp to /optimizers/smorms3 directory. * Reset stepSizeMat only when stepSize != previousStepSize. * Update tests. --- src/mlpack/core/optimizers/sgd/sgd.hpp | 1 - .../core/optimizers/smorms3/CMakeLists.txt | 2 + .../core/optimizers/smorms3/smorms3.hpp | 40 +++++++++++-------- .../core/optimizers/smorms3/smorms3_impl.hpp | 39 ++++++++++++++++++ .../smorms3_update.hpp | 21 ++++++---- src/mlpack/tests/smorms3_test.cpp | 7 ++-- 6 files changed, 81 insertions(+), 29 deletions(-) create mode 100644 src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp rename src/mlpack/core/optimizers/{sgd/update_policies => smorms3}/smorms3_update.hpp (85%) diff --git a/src/mlpack/core/optimizers/sgd/sgd.hpp b/src/mlpack/core/optimizers/sgd/sgd.hpp index 8f7c348feeb..4dcb4eb54fe 100644 --- a/src/mlpack/core/optimizers/sgd/sgd.hpp +++ b/src/mlpack/core/optimizers/sgd/sgd.hpp @@ -17,7 +17,6 @@ #include #include #include -#include namespace mlpack { namespace optimization { diff --git a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt index cefe7c12883..e77beb68b1d 100644 --- a/src/mlpack/core/optimizers/smorms3/CMakeLists.txt +++ b/src/mlpack/core/optimizers/smorms3/CMakeLists.txt @@ -1,5 +1,7 @@ set(SOURCES smorms3.hpp + smorms3_impl.hpp + smorms3_update.hpp ) set(DIR_SRCS) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 0aff1420b88..2bc230c83a0 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -17,6 +17,8 @@ #include #include +#include "smorms3_update.hpp" + namespace mlpack { namespace optimization { @@ -92,10 +94,7 @@ class SMORMS3 * @param iterate Starting point (will be modified). * @return Objective value of the final point. */ - double Optimize(const arma::mat& iterate) - { - return optimizer.Optimize(iterate); - } + double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); } //! Get the instantiated function to be optimized. const DecomposableFunctionType& Function() const { return function; } @@ -103,29 +102,29 @@ class SMORMS3 DecomposableFunctionType& Function() { return function; } //! Get the step size. - double StepSize() const { return stepSize; } + double StepSize() const { return optimizer.StepSize(); } //! Modify the step size. - double& StepSize() { return stepSize; } + double& StepSize() { return optimizer.StepSize(); } //! Get the value used to initialise the mean squared gradient parameter. - double Epsilon() const { return eps; } + double Epsilon() const { return optimizer.Epsilon(); } //! Modify the value used to initialise the mean squared gradient parameter. - double& Epsilon() { return eps; } + double& Epsilon() { return optimizer.Epsilon(); } //! Get the maximum number of iterations (0 indicates no limit). - size_t MaxIterations() const { return maxIterations; } + size_t MaxIterations() const { return optimizer.MaxIterations(); } //! Modify the maximum number of iterations (0 indicates no limit). - size_t& MaxIterations() { return maxIterations; } + size_t& MaxIterations() { return optimizer.MaxIterations(); } //! Get the tolerance for termination. - double Tolerance() const { return tolerance; } + double Tolerance() const { return optimizer.Tolerance(); } //! Modify the tolerance for termination. - double& Tolerance() { return tolerance; } + double& Tolerance() { return optimizer.Tolerance(); } //! Get whether or not the individual functions are shuffled. - bool Shuffle() const { return shuffle; } + bool Shuffle() const { return optimizer.Shuffle(); } //! Modify whether or not the individual functions are shuffled. - bool& Shuffle() { return shuffle; } + bool& Shuffle() { return optimizer.Shuffle(); } private: //! The instantiated function. @@ -135,7 +134,7 @@ class SMORMS3 double stepSize; //! The value used to initialise the mean squared gradient parameter. - double eps; + double epsilon; //! The maximum number of allowed iterations. size_t maxIterations; @@ -148,10 +147,19 @@ class SMORMS3 bool shuffle; //! The Stochastic Gradient Descent object with SMORMS3 update policy. - SGD optimizer; + SGD + optimizer(DecomposableFunctionType&, + const double, + const size_t, + const double, + const bool, + SMORMS3Update); }; } // namespace optimization } // namespace mlpack +// Include implementation. +#include "smorms3_impl.hpp" + #endif diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp new file mode 100644 index 00000000000..c8ab5f0e254 --- /dev/null +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -0,0 +1,39 @@ +/** + * @file smorms3_impl.hpp + * @author Vivek Pal + * + * Implementation of the SMORMS3 constructor. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP +#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_IMPL_HPP + +// In case it hasn't been included yet. +#include "smorms3.hpp" + +namespace mlpack { +namespace optimization { + +template +SMORMS3::SMORMS3(DecomposableFunctionType& function, + const double stepSize, + const double epsilon, + const size_t maxIterations, + const double tolerance, + const bool shuffle) : + function(function), + stepSize(stepSize), + epsilon(epsilon), + maxIterations(maxIterations), + tolerance(tolerance), + shuffle(shuffle) +{ /* Nothing to do. */ } + +} // namespace optimization +} // namespace mlpack + +#endif diff --git a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp similarity index 85% rename from src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp rename to src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index 9a7376e3de8..338cefd7a0c 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -39,9 +39,9 @@ class SMORMS3Update /** * Construct the SMORMS3 update policy with given epsilon parameter. * - * @param psilon Value used to initialise the mean squared gradient parameter. + * @param epsilon Value used to initialise the mean squared gradient parameter. */ - SMORMS3Update(const double eps = 1e-16) : eps(eps) + SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon) { /* Do nothing. */ }; /** @@ -72,8 +72,8 @@ class SMORMS3Update * @param gradient The gradient matrix. */ void Update(arma::mat& iterate, - double stepSize, - arma::mat& gradient) + const double stepSize, + const arma::mat& gradient) { // Update the iterate. arma::mat r = 1 / (mem + 1); @@ -84,18 +84,23 @@ class SMORMS3Update g2 = (1 - r) % g2; g2 += r % (gradient % gradient); - arma::mat x = (g % g) / (g2 + eps); + arma::mat x = (g % g) / (g2 + epsilon); - stepSizeMat.fill(stepSize); + if (stepSize != previousStepSize) + { + stepSizeMat.fill(stepSize); + } - iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + eps); + iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + epsilon); mem %= (1 - x); mem += 1; + + previousStepSize = stepSize; } private: //! The value used to initialise the mean squared gradient parameter. - double eps; + double epsilon, previousStepSize; // The parameters mem, g and g2. arma::mat mem, g, g2; // The matrix to be filled with stepSize. diff --git a/src/mlpack/tests/smorms3_test.cpp b/src/mlpack/tests/smorms3_test.cpp index 2ca3fb81fad..1d62556312d 100644 --- a/src/mlpack/tests/smorms3_test.cpp +++ b/src/mlpack/tests/smorms3_test.cpp @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include @@ -35,8 +35,7 @@ BOOST_AUTO_TEST_SUITE(SMORMS3Test); BOOST_AUTO_TEST_CASE(SimpleSMORMS3TestFunction) { SGDTestFunction f; - SMORMS3Update smorms3Update(1e-16); - SGD s(f, 1e-3, 5000000, 1e-9, true, smorms3Update); + SMORMS3 s(f, 0.001, 1e-16, 5000000, 1e-9, true); arma::mat coordinates = f.GetInitialPoint(); s.Optimize(coordinates); @@ -96,7 +95,7 @@ BOOST_AUTO_TEST_CASE(SMORMS3LogisticRegressionTest) LogisticRegression<> lr(shuffledData.n_rows, 0.5); LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5); - SGD, SMORMS3Update > smorms3(lrf); + SMORMS3 > smorms3(lrf); lr.Train(smorms3); // Ensure that the error is close to zero. From c4121ff2c62e04a236450e34fd80e4f62202ffa0 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 16 Mar 2017 09:49:50 +0530 Subject: [PATCH 20/78] Rename n_rows and n_cols as rows and cols respectively --- .../core/optimizers/smorms3/smorms3_update.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index 338cefd7a0c..20731ed4626 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -52,16 +52,16 @@ class SMORMS3Update * @param n_cols number of columns in the gradient matrix. * @param lRate */ - void Initialize(const size_t n_rows, - const size_t n_cols) + void Initialize(const size_t rows, + const size_t cols) { // Initialise the parameters mem, g and g2. - mem = arma::ones(n_rows, n_cols); - g = arma::zeros(n_rows, n_cols); - g2 = arma::zeros(n_rows, n_cols); + mem = arma::ones(rows, cols); + g = arma::zeros(rows, cols); + g2 = arma::zeros(rows, cols); // Initialise a matrix to be filled with stepSize. - stepSizeMat = arma::zeros(n_rows, n_cols); + stepSizeMat = arma::zeros(rows, cols); } /** From 7bbe4359de73c7f084460dc6d71f2b9a8f6f3026 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sat, 18 Mar 2017 19:56:54 +0530 Subject: [PATCH 21/78] No need to include smorms3_update.hpp here It was moved to optimizer/smorms3 directory. --- src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt index b6e4a0dd8b0..36d87a97ebe 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt +++ b/src/mlpack/core/optimizers/sgd/update_policies/CMakeLists.txt @@ -1,7 +1,6 @@ set(SOURCES vanilla_update.hpp momentum_update.hpp - smorms3_update.hpp ) set(DIR_SRCS) From 0dd928840e98326929b8d644b95d881271372db1 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sat, 18 Mar 2017 19:59:04 +0530 Subject: [PATCH 22/78] The arguments not necessary to define the type --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 2bc230c83a0..1803141325f 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -147,13 +147,7 @@ class SMORMS3 bool shuffle; //! The Stochastic Gradient Descent object with SMORMS3 update policy. - SGD - optimizer(DecomposableFunctionType&, - const double, - const size_t, - const double, - const bool, - SMORMS3Update); + SGD optimizer; }; } // namespace optimization From 24f6cec66e0008bc11b07ca786b72a729073a442 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 19 Mar 2017 04:21:13 +0530 Subject: [PATCH 23/78] Fix incorrect instantiation of SMORMS3 constructor --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 3 +++ src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 1803141325f..6f8756876d9 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -146,6 +146,9 @@ class SMORMS3 //! iterating. bool shuffle; + //! The SMORMS3 update policy object. + SMORMS3Update smorms3Update; + //! The Stochastic Gradient Descent object with SMORMS3 update policy. SGD optimizer; }; diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index c8ab5f0e254..9a431a2bf47 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -30,7 +30,14 @@ SMORMS3::SMORMS3(DecomposableFunctionType& function, epsilon(epsilon), maxIterations(maxIterations), tolerance(tolerance), - shuffle(shuffle) + shuffle(shuffle), + smorms3Update(epsilon), + optimizer(function, + stepSize, + maxIterations, + tolerance, + shuffle, + smorms3Update) { /* Nothing to do. */ } } // namespace optimization From ac8492b0e01c4e2b615fda392b1817e81c164f6a Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 19 Mar 2017 04:22:21 +0530 Subject: [PATCH 24/78] Initialise previousStepSize before its first use Fixes valgrind error: Conditional jump or move depends on uninitialised value(s) --- src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index 20731ed4626..3d55fcd14a5 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -100,7 +100,7 @@ class SMORMS3Update } private: //! The value used to initialise the mean squared gradient parameter. - double epsilon, previousStepSize; + double epsilon, previousStepSize = 0; // The parameters mem, g and g2. arma::mat mem, g, g2; // The matrix to be filled with stepSize. From 5fee74edec6b09abf4287e1293de79d710633622 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 19 Mar 2017 16:12:04 +0530 Subject: [PATCH 25/78] Fix a couple of comments --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 6f8756876d9..91acc7b99df 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -146,10 +146,10 @@ class SMORMS3 //! iterating. bool shuffle; - //! The SMORMS3 update policy object. + //! The SMORMS3Update update policy object. SMORMS3Update smorms3Update; - //! The Stochastic Gradient Descent object with SMORMS3 update policy. + //! The Stochastic Gradient Descent object with SMORMS3Update update policy. SGD optimizer; }; From 98b2f5a6397b4913519f01ea2be9319b92f369e6 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 19 Mar 2017 22:24:04 +0530 Subject: [PATCH 26/78] Add Epsilon function to the SMORMS3Update update policy --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 4 ++-- src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 91acc7b99df..933e2ac4225 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -107,9 +107,9 @@ class SMORMS3 double& StepSize() { return optimizer.StepSize(); } //! Get the value used to initialise the mean squared gradient parameter. - double Epsilon() const { return optimizer.Epsilon(); } + double Epsilon() const { return smorms3Update.Epsilon(); } //! Modify the value used to initialise the mean squared gradient parameter. - double& Epsilon() { return optimizer.Epsilon(); } + double& Epsilon() { return smorms3Update.Epsilon(); } //! Get the maximum number of iterations (0 indicates no limit). size_t MaxIterations() const { return optimizer.MaxIterations(); } diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index 3d55fcd14a5..cab6b03be93 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -44,6 +44,11 @@ class SMORMS3Update SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon) { /* Do nothing. */ }; + //! Get the value used to initialise the mean squared gradient parameter. + double Epsilon() const { return epsilon; } + //! Modify the value used to initialise the mean squared gradient parameter. + double& Epsilon() { return epsilon; } + /** * The Initialize method is called by SGD::Optimize method with UpdatePolicy * SMORMS3Update before the start of the iteration update process. From dacb95ead88d9bc9ede98516e13dd2df54e73be5 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Wed, 22 Mar 2017 21:06:40 +0530 Subject: [PATCH 27/78] Use constructor to initialise previousStepSize variable --- src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index cab6b03be93..0d89c74d26c 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -41,8 +41,10 @@ class SMORMS3Update * * @param epsilon Value used to initialise the mean squared gradient parameter. */ - SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon) - { /* Do nothing. */ }; + SMORMS3Update(const double epsilon = 1e-16) : + epsilon(epsilon), + previousStepSize(0) + { /* Do nothing. */ } //! Get the value used to initialise the mean squared gradient parameter. double Epsilon() const { return epsilon; } @@ -105,7 +107,7 @@ class SMORMS3Update } private: //! The value used to initialise the mean squared gradient parameter. - double epsilon, previousStepSize = 0; + double epsilon, previousStepSize; // The parameters mem, g and g2. arma::mat mem, g, g2; // The matrix to be filled with stepSize. From b057f881ab11bd65501c696f68f465f44f8fc71c Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Wed, 22 Mar 2017 21:11:39 +0530 Subject: [PATCH 28/78] Remove those parameters from SMORMS3 class that: are modified by the optimizer object which are stepSize, maxIterations, tolerance and shuffle --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 13 ------------- src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp | 4 ---- 2 files changed, 17 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 933e2ac4225..82665211713 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -130,22 +130,9 @@ class SMORMS3 //! The instantiated function. DecomposableFunctionType& function; - //! The step size for each example. - double stepSize; - //! The value used to initialise the mean squared gradient parameter. double epsilon; - //! The maximum number of allowed iterations. - size_t maxIterations; - - //! The tolerance for termination. - double tolerance; - - //! Controls whether or not the individual functions are shuffled when - //! iterating. - bool shuffle; - //! The SMORMS3Update update policy object. SMORMS3Update smorms3Update; diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index 9a431a2bf47..18dce55fae1 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -26,11 +26,7 @@ SMORMS3::SMORMS3(DecomposableFunctionType& function, const double tolerance, const bool shuffle) : function(function), - stepSize(stepSize), epsilon(epsilon), - maxIterations(maxIterations), - tolerance(tolerance), - shuffle(shuffle), smorms3Update(epsilon), optimizer(function, stepSize, From 81b8d03faf19706416edeffc6cb0318e84eaabb6 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Tue, 28 Mar 2017 23:01:34 +0530 Subject: [PATCH 29/78] Minor changes addressing review comments --- src/mlpack/core/optimizers/smorms3/smorms3.hpp | 18 +++++------------- .../core/optimizers/smorms3/smorms3_impl.hpp | 5 +---- .../core/optimizers/smorms3/smorms3_update.hpp | 15 ++++++++++----- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 82665211713..41643c80236 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -34,6 +34,7 @@ namespace optimization { * author = {Simon Funk}, * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, * year = {2015} + * url = {http://sifter.org/~simon/journal/20150420.html} * } * @endcode * @@ -97,9 +98,9 @@ class SMORMS3 double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); } //! Get the instantiated function to be optimized. - const DecomposableFunctionType& Function() const { return function; } + const DecomposableFunctionType& Function() const { return optimizer.Function(); } //! Modify the instantiated function. - DecomposableFunctionType& Function() { return function; } + DecomposableFunctionType& Function() { return optimizer.Function(); } //! Get the step size. double StepSize() const { return optimizer.StepSize(); } @@ -107,9 +108,9 @@ class SMORMS3 double& StepSize() { return optimizer.StepSize(); } //! Get the value used to initialise the mean squared gradient parameter. - double Epsilon() const { return smorms3Update.Epsilon(); } + double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); } //! Modify the value used to initialise the mean squared gradient parameter. - double& Epsilon() { return smorms3Update.Epsilon(); } + double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); } //! Get the maximum number of iterations (0 indicates no limit). size_t MaxIterations() const { return optimizer.MaxIterations(); } @@ -127,15 +128,6 @@ class SMORMS3 bool& Shuffle() { return optimizer.Shuffle(); } private: - //! The instantiated function. - DecomposableFunctionType& function; - - //! The value used to initialise the mean squared gradient parameter. - double epsilon; - - //! The SMORMS3Update update policy object. - SMORMS3Update smorms3Update; - //! The Stochastic Gradient Descent object with SMORMS3Update update policy. SGD optimizer; }; diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp index 18dce55fae1..68ced5913aa 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_impl.hpp @@ -25,15 +25,12 @@ SMORMS3::SMORMS3(DecomposableFunctionType& function, const size_t maxIterations, const double tolerance, const bool shuffle) : - function(function), - epsilon(epsilon), - smorms3Update(epsilon), optimizer(function, stepSize, maxIterations, tolerance, shuffle, - smorms3Update) + SMORMS3Update(epsilon)) { /* Nothing to do. */ } } // namespace optimization diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index 0d89c74d26c..af8de48835f 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -29,6 +29,7 @@ namespace optimization { * author = {Simon Funk}, * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, * year = {2015} + * url = {http://sifter.org/~simon/journal/20150420.html} * } * @endcode */ @@ -42,8 +43,8 @@ class SMORMS3Update * @param epsilon Value used to initialise the mean squared gradient parameter. */ SMORMS3Update(const double epsilon = 1e-16) : - epsilon(epsilon), - previousStepSize(0) + epsilon(epsilon), + previousStepSize(0) { /* Do nothing. */ } //! Get the value used to initialise the mean squared gradient parameter. @@ -96,20 +97,24 @@ class SMORMS3Update if (stepSize != previousStepSize) { stepSizeMat.fill(stepSize); + previousStepSize = stepSize; } iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + epsilon); mem %= (1 - x); mem += 1; - - previousStepSize = stepSize; } private: //! The value used to initialise the mean squared gradient parameter. - double epsilon, previousStepSize; + double epsilon; + + //! The previous value of step size in each iteration of update step. + double previousStepSize; + // The parameters mem, g and g2. arma::mat mem, g, g2; + // The matrix to be filled with stepSize. arma::mat stepSizeMat; }; From ab7a5adc528beeda16419cdaad881544733bf0a9 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 30 Mar 2017 17:07:37 +0530 Subject: [PATCH 30/78] Add implementation of the Adam update policy --- .../core/optimizers/adam/CMakeLists.txt | 1 + src/mlpack/core/optimizers/adam/adam.hpp | 73 +++---- src/mlpack/core/optimizers/adam/adam_impl.hpp | 154 +-------------- .../core/optimizers/adam/adam_update.hpp | 180 ++++++++++++++++++ 4 files changed, 218 insertions(+), 190 deletions(-) create mode 100644 src/mlpack/core/optimizers/adam/adam_update.hpp diff --git a/src/mlpack/core/optimizers/adam/CMakeLists.txt b/src/mlpack/core/optimizers/adam/CMakeLists.txt index 3cbcfd84e79..eabdbbaafa5 100644 --- a/src/mlpack/core/optimizers/adam/CMakeLists.txt +++ b/src/mlpack/core/optimizers/adam/CMakeLists.txt @@ -1,6 +1,7 @@ set(SOURCES adam.hpp adam_impl.hpp + adam_update.hpp ) set(DIR_SRCS) diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp index 62af4c6ac82..9c729b4d887 100644 --- a/src/mlpack/core/optimizers/adam/adam.hpp +++ b/src/mlpack/core/optimizers/adam/adam.hpp @@ -20,6 +20,9 @@ #include +#include +#include "adam_update.hpp" + namespace mlpack { namespace optimization { @@ -105,81 +108,59 @@ class Adam * @param iterate Starting point (will be modified). * @return Objective value of the final point. */ - double Optimize(arma::mat& iterate); + double Optimize(arma::mat& iterate){ return optimizer.Optimize(iterate); } //! Get the instantiated function to be optimized. - const DecomposableFunctionType& Function() const { return function; } + const DecomposableFunctionType& Function() const + { + return optimizer.Function(); + } //! Modify the instantiated function. - DecomposableFunctionType& Function() { return function; } + DecomposableFunctionType& Function() { return optimizer.Function(); } //! Get the step size. - double StepSize() const { return stepSize; } + double StepSize() const { return optimizer.StepSize(); } //! Modify the step size. - double& StepSize() { return stepSize; } + double& StepSize() { return optimizer.StepSize(); } //! Get the smoothing parameter. - double Beta1() const { return beta1; } + double Beta1() const { return optimizer.UpdatePolicy().Beta1(); } //! Modify the smoothing parameter. - double& Beta1() { return beta1; } + double& Beta1() { return optimizer.UpdatePolicy().Beta1(); } //! Get the second moment coefficient. - double Beta2() const { return beta2; } + double Beta2() const { return optimizer.UpdatePolicy().Beta2(); } //! Modify the second moment coefficient. - double& Beta2() { return beta2; } + double& Beta2() { return optimizer.UpdatePolicy().Beta2(); } //! Get the value used to initialise the mean squared gradient parameter. - double Epsilon() const { return eps; } + double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); } //! Modify the value used to initialise the mean squared gradient parameter. - double& Epsilon() { return eps; } + double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); } //! Get the maximum number of iterations (0 indicates no limit). - size_t MaxIterations() const { return maxIterations; } + size_t MaxIterations() const { return optimizer.MaxIterations(); } //! Modify the maximum number of iterations (0 indicates no limit). - size_t& MaxIterations() { return maxIterations; } + size_t& MaxIterations() { return optimizer.MaxIterations(); } //! Get the tolerance for termination. - double Tolerance() const { return tolerance; } + double Tolerance() const { return optimizer.Tolerance(); } //! Modify the tolerance for termination. - double& Tolerance() { return tolerance; } + double& Tolerance() { return optimizer.Tolerance(); } //! Get whether or not the individual functions are shuffled. - bool Shuffle() const { return shuffle; } + bool Shuffle() const { return optimizer.Shuffle(); } //! Modify whether or not the individual functions are shuffled. - bool& Shuffle() { return shuffle; } + bool& Shuffle() { return optimizer.Shuffle(); } //! Get whether or not the AdaMax optimizer is specified. - bool AdaMax() const { return adaMax; } + bool AdaMax() const { return optimizer.UpdatePolicy().AdaMax(); } //! Modify wehther or not the AdaMax optimizer is to be used. - bool& AdaMax() { return adaMax; } + bool& AdaMax() { return optimizer.UpdatePolicy().AdaMax(); } private: - //! The instantiated function. - DecomposableFunctionType& function; - - //! The step size for each example. - double stepSize; - - //! Exponential decay rate for the first moment estimates. - double beta1; - - //! Exponential decay rate for the weighted infinity norm estimates. - double beta2; - - //! The value used to initialise the mean squared gradient parameter. - double eps; - - //! The maximum number of allowed iterations. - size_t maxIterations; - - //! The tolerance for termination. - double tolerance; - - //! Controls whether or not the individual functions are shuffled when - //! iterating. - bool shuffle; - - //! Specifies whether or not the AdaMax optimizer is to be used. - bool adaMax; + //! The Stochastic Gradient Descent object with Adam policy. + SGD optimizer; }; } // namespace optimization diff --git a/src/mlpack/core/optimizers/adam/adam_impl.hpp b/src/mlpack/core/optimizers/adam/adam_impl.hpp index 8a6be66c079..b9faf1226d9 100644 --- a/src/mlpack/core/optimizers/adam/adam_impl.hpp +++ b/src/mlpack/core/optimizers/adam/adam_impl.hpp @@ -26,156 +26,22 @@ Adam::Adam(DecomposableFunctionType& function, const double stepSize, const double beta1, const double beta2, - const double eps, + const double epsilon, const size_t maxIterations, const double tolerance, const bool shuffle, const bool adaMax) : - function(function), - stepSize(stepSize), - beta1(beta1), - beta2(beta2), - eps(eps), - maxIterations(maxIterations), - tolerance(tolerance), - shuffle(shuffle), - adaMax(adaMax) + optimizer(function, + stepSize, + maxIterations, + tolerance, + shuffle, + AdamUpdate(epsilon, + beta1, + beta2, + adaMax)) { /* Nothing to do. */ } -//! Optimize the function (minimize). -template -double Adam::Optimize(arma::mat& iterate) -{ - // Find the number of functions to use. - const size_t numFunctions = function.NumFunctions(); - - // This is used only if shuffle is true. - arma::Col visitationOrder; - if (shuffle) - visitationOrder = arma::shuffle(arma::linspace>(0, - (numFunctions - 1), numFunctions)); - - // To keep track of where we are and how things are going. - size_t currentFunction = 0; - double overallObjective = 0; - double lastObjective = DBL_MAX; - - // Calculate the first objective function. - for (size_t i = 0; i < numFunctions; ++i) - overallObjective += function.Evaluate(iterate, i); - - // Now iterate! - arma::mat gradient(iterate.n_rows, iterate.n_cols); - - // Exponential moving average of gradient values. - arma::mat m = arma::zeros(iterate.n_rows, iterate.n_cols); - - /** - * Initialize either the exponentially weighted infinity norm for AdaMax - * optimizer (u) or exponential moving average of squared gradient values - * for Adam optimizer (v). - */ - arma::mat u, v; - if (adaMax) - { - u = arma::zeros(iterate.n_rows, iterate.n_cols); - } - else - { - v = arma::zeros(iterate.n_rows, iterate.n_cols); - } - - for (size_t i = 1; i != maxIterations; ++i, ++currentFunction) - { - // Is this iteration the start of a sequence? - if ((currentFunction % numFunctions) == 0) - { - // Output current objective function. - Log::Info << "Adam: iteration " << i << ", objective " << overallObjective - << "." << std::endl; - - if (std::isnan(overallObjective) || std::isinf(overallObjective)) - { - Log::Warn << "Adam: converged to " << overallObjective - << "; terminating with failure. Try a smaller step size?" - << std::endl; - return overallObjective; - } - - if (std::abs(lastObjective - overallObjective) < tolerance) - { - Log::Info << "Adam: minimized within tolerance " << tolerance << "; " - << "terminating optimization." << std::endl; - return overallObjective; - } - - // Reset the counter variables. - lastObjective = overallObjective; - overallObjective = 0; - currentFunction = 0; - - if (shuffle) // Determine order of visitation. - visitationOrder = arma::shuffle(visitationOrder); - } - - // Evaluate the gradient for this iteration. - if (shuffle) - function.Gradient(iterate, visitationOrder[currentFunction], gradient); - else - function.Gradient(iterate, currentFunction, gradient); - - // And update the iterate. - m *= beta1; - m += (1 - beta1) * gradient; - - if (adaMax) - { - // Update the exponentially weighted infinity norm. - u *= beta2; - u = arma::max(u, arma::abs(gradient)); - } - else - { - v *= beta2; - v += (1 - beta2) * (gradient % gradient); - } - - const double biasCorrection1 = 1.0 - std::pow(beta1, (double) i); - const double biasCorrection2 = 1.0 - std::pow(beta2, (double) i); - - if (adaMax) - { - if (biasCorrection1 != 0.0) - iterate -= (stepSize / biasCorrection1 * m / (u + eps)); - } - else - { - /** - * It should be noted that the term, m / (arma::sqrt(v) + eps), in the - * following expression is an approximation of the following actual term; - * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps). - */ - iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) * - m / (arma::sqrt(v) + eps); - } - - // Now add that to the overall objective function. - if (shuffle) - overallObjective += function.Evaluate(iterate, - visitationOrder[currentFunction]); - else - overallObjective += function.Evaluate(iterate, currentFunction); - } - - Log::Info << "Adam: maximum iterations (" << maxIterations << ") reached; " - << "terminating optimization." << std::endl; - // Calculate final objective. - overallObjective = 0; - for (size_t i = 0; i < numFunctions; ++i) - overallObjective += function.Evaluate(iterate, i); - return overallObjective; -} - } // namespace optimization } // namespace mlpack diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp new file mode 100644 index 00000000000..fba575f4ce5 --- /dev/null +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -0,0 +1,180 @@ +/** + * @file adam.hpp + * @author Ryan Curtin + * @author Vasanth Kalingeri + * @author Marcus Edel + * @author Vivek Pal + * + * Adam and AdaMax optimizer. Adam is an an algorithm for first-order gradient- + * -based optimization of stochastic objective functions, based on adaptive + * estimates of lower-order moments. AdaMax is simply a variant of Adam based + * on the infinity norm. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_OPTIMIZERS_ADAM_ADAM_UPDATE_HPP +#define MLPACK_CORE_OPTIMIZERS_ADAM_ADAM_UPDATE_HPP + +#include + +namespace mlpack { +namespace optimization { + +/** + * Adam is an optimizer that computes individual adaptive learning rates for + * different parameters from estimates of first and second moments of the + * gradients. AdaMax is a variant of Adam based on the infinity norm as given + * in the section 7 of the following paper. + * + * For more information, see the following. + * + * @code + * @article{Kingma2014, + * author = {Diederik P. Kingma and Jimmy Ba}, + * title = {Adam: {A} Method for Stochastic Optimization}, + * journal = {CoRR}, + * year = {2014} + * } + * @endcode + * + */ +class AdamUpdate +{ + public: + /** + * Construct the Adam update policy with the given epsilon parameter. + * + * @param epsilon The epsilon value used to initialise the squared gradient + * parameter. + */ + AdamUpdate(const double epsilon = 1e-8, + const double beta1 = 0.9, + const double beta2 = 0.999, + const bool adaMax = false) : + epsilon(epsilon), + beta1(beta1), + beta2(beta2), + adaMax(adaMax) + { + // Nothing to do. + } + + /** + * The Initialize method is called by SGD Optimizer method before the start of + * the iteration update process. + * + * @param rows number of rows in the gradient matrix. + * @param cols number of columns in the gradient matrix. + */ + void Initialize(const size_t rows, + const size_t cols) + { + m = arma::zeros(rows, cols); + if (adaMax) + { + u = arma::zeros(rows, cols); + } + else + { + v = arma::zeros(rows, cols); + } + } + + /** + * Update step for Adam. + * + * @param iterate Parameters that minimize the function. + * @param stepSize Step size to be used for the given iteration. + * @param gradient The gradient matrix. + */ + void Update(arma::mat& iterate, + const double stepSize, + const arma::mat& gradient, + const size_t i) + { + // And update the iterate. + m *= beta1; + m += (1 - beta1) * gradient; + + if (adaMax) + { + // Update the exponentially weighted infinity norm. + u *= beta2; + u = arma::max(u, arma::abs(gradient)); + } + else + { + v *= beta2; + v += (1 - beta2) * (gradient % gradient); + } + + const double biasCorrection1 = 1.0 - std::pow(beta1, (double) i); + const double biasCorrection2 = 1.0 - std::pow(beta2, (double) i); + + if (adaMax) + { + if (biasCorrection1 != 0.0) + iterate -= (stepSize / biasCorrection1 * m / (u + epsilon)); + } + else + { + /** + * It should be noted that the term, m / (arma::sqrt(v) + eps), in the + * following expression is an approximation of the following actual term; + * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps). + */ + iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) * + m / (arma::sqrt(v) + epsilon); + } + } + + //! Get the value used to initialise the squared gradient parameter. + double Epsilon() const { return epsilon; } + //! Modify the value used to initialise the squared gradient parameter. + double& Epsilon() { return epsilon; } + + //! Get the smoothing parameter. + double Beta1() const { return beta1; } + //! Modify the smoothing parameter. + double& Beta1() { return beta1; } + + //! Get the second moment coefficient. + double Beta2() const { return beta2; } + //! Modify the second moment coefficient. + double& Beta2() { return beta2; } + + //! Get whether or not the AdaMax optimizer is specified. + bool AdaMax() const { return adaMax; } + //! Modify wehther or not the AdaMax optimizer is to be used. + bool& AdaMax() { return adaMax; } + + private: + // The epsilon value used to initialise the squared gradient parameter. + double epsilon; + + // The smoothing parameter. + double beta1; + + // The second moment coefficient. + double beta2; + + //! Specifies whether or not the AdaMax optimizer is to be used. + bool adaMax; + + // The exponential moving average of gradient values. + arma::mat m; + + // The exponentially weighted infinity norm. + arma::mat u; + + // The exponential moving average of squared gradient values. + arma::mat v; +}; + +} // namespace optimization +} // namespace mlpack + +#endif From bf7e38d7657ae51ce9dfbe9a20619932501a943b Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 30 Mar 2017 17:13:21 +0530 Subject: [PATCH 31/78] Add iteration parameter "i" to the Update function Used in the Update step of the Adam optimizer. --- src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp | 3 ++- src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp | 3 ++- src/mlpack/core/optimizers/sgd/sgd_impl.hpp | 2 +- .../core/optimizers/sgd/update_policies/momentum_update.hpp | 3 ++- .../core/optimizers/sgd/update_policies/vanilla_update.hpp | 3 ++- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp index 391e2a88bd3..537d2d067c9 100644 --- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp +++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp @@ -83,7 +83,8 @@ class AdaDeltaUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient) + const arma::mat& gradient, + const size_t i) { // Accumulate gradient. meanSquaredGradient *= rho; diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp index 2fb52573b1b..12634a4eff7 100644 --- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp +++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp @@ -79,7 +79,8 @@ class AdaGradUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient) + const arma::mat& gradient, + const size_t i) { squaredGradient += (gradient % gradient); iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) + epsilon); diff --git a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp index 5ac19ba9607..ca756cb20e2 100644 --- a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp +++ b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp @@ -108,7 +108,7 @@ double SGD::Optimize( function.Gradient(iterate, currentFunction, gradient); // Use the update policy to take a step. - updatePolicy.Update(iterate, stepSize, gradient); + updatePolicy.Update(iterate, stepSize, gradient, i); // Now add that to the overall objective function. if (shuffle) diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp index 2947c355f90..889d7255049 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp @@ -99,7 +99,8 @@ class MomentumUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient) + const arma::mat& gradient, + const size_t i) { velocity = momentum * velocity - stepSize * gradient; iterate += velocity; diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp index 1bd85bf1c37..2c75a4b4fdc 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp @@ -52,7 +52,8 @@ class VanillaUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient) + const arma::mat& gradient, + const size_t i) { // Perform the vanilla SGD update. iterate -= stepSize * gradient; From c0eb047e8cebe43795fc30ce7e8bfb10638ec8b3 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 30 Mar 2017 17:15:44 +0530 Subject: [PATCH 32/78] Suppress compiler warnings on unused parameter i --- src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp | 2 +- src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp | 2 +- .../core/optimizers/sgd/update_policies/momentum_update.hpp | 2 +- .../core/optimizers/sgd/update_policies/vanilla_update.hpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp index 537d2d067c9..cb582239a8e 100644 --- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp +++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp @@ -84,7 +84,7 @@ class AdaDeltaUpdate void Update(arma::mat& iterate, const double stepSize, const arma::mat& gradient, - const size_t i) + const size_t /*i*/) { // Accumulate gradient. meanSquaredGradient *= rho; diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp index 12634a4eff7..bc0f1296aed 100644 --- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp +++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp @@ -80,7 +80,7 @@ class AdaGradUpdate void Update(arma::mat& iterate, const double stepSize, const arma::mat& gradient, - const size_t i) + const size_t /*i*/) { squaredGradient += (gradient % gradient); iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) + epsilon); diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp index 889d7255049..7e4a1deeed3 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp @@ -100,7 +100,7 @@ class MomentumUpdate void Update(arma::mat& iterate, const double stepSize, const arma::mat& gradient, - const size_t i) + const size_t /*i*/) { velocity = momentum * velocity - stepSize * gradient; iterate += velocity; diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp index 2c75a4b4fdc..159d7f3398f 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp @@ -53,7 +53,7 @@ class VanillaUpdate void Update(arma::mat& iterate, const double stepSize, const arma::mat& gradient, - const size_t i) + const size_t /*i*/) { // Perform the vanilla SGD update. iterate -= stepSize * gradient; From 14a0091884c41870c21474ecdb0f5cea2a509b1c Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 30 Mar 2017 19:01:37 +0530 Subject: [PATCH 33/78] Use the combination of for_each and a lambda expression Avoids allocating the stepSizeMat completely. --- .../optimizers/smorms3/smorms3_update.hpp | 20 +++---------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index af8de48835f..b9f43c1f51d 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -43,8 +43,7 @@ class SMORMS3Update * @param epsilon Value used to initialise the mean squared gradient parameter. */ SMORMS3Update(const double epsilon = 1e-16) : - epsilon(epsilon), - previousStepSize(0) + epsilon(epsilon) { /* Do nothing. */ } //! Get the value used to initialise the mean squared gradient parameter. @@ -67,9 +66,6 @@ class SMORMS3Update mem = arma::ones(rows, cols); g = arma::zeros(rows, cols); g2 = arma::zeros(rows, cols); - - // Initialise a matrix to be filled with stepSize. - stepSizeMat = arma::zeros(rows, cols); } /** @@ -94,13 +90,9 @@ class SMORMS3Update arma::mat x = (g % g) / (g2 + epsilon); - if (stepSize != previousStepSize) - { - stepSizeMat.fill(stepSize); - previousStepSize = stepSize; - } + x.for_each( [stepSize](double &v) { v = std::min(v, stepSize); } ); - iterate -= gradient % arma::min(x, stepSizeMat) / (arma::sqrt(g2) + epsilon); + iterate -= gradient % x / (arma::sqrt(g2) + epsilon); mem %= (1 - x); mem += 1; @@ -109,14 +101,8 @@ class SMORMS3Update //! The value used to initialise the mean squared gradient parameter. double epsilon; - //! The previous value of step size in each iteration of update step. - double previousStepSize; - // The parameters mem, g and g2. arma::mat mem, g, g2; - - // The matrix to be filled with stepSize. - arma::mat stepSizeMat; }; } // namespace optimization From 93e817b998406508ec005c73bf9464a1fc2825cd Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 30 Mar 2017 19:06:04 +0530 Subject: [PATCH 34/78] fixup! Fix a couple of comments --- src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index b9f43c1f51d..6617810de2b 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -55,8 +55,8 @@ class SMORMS3Update * The Initialize method is called by SGD::Optimize method with UpdatePolicy * SMORMS3Update before the start of the iteration update process. * - * @param n_rows number of rows in the gradient matrix. - * @param n_cols number of columns in the gradient matrix. + * @param rows number of rows in the gradient matrix. + * @param cols number of columns in the gradient matrix. * @param lRate */ void Initialize(const size_t rows, From 1f0310b52fd2f8ef7cfc089b470cc05b3944405d Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 30 Mar 2017 21:04:11 +0530 Subject: [PATCH 35/78] Try transform() instead of for_each() to fix Travis build failure --- src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index 6617810de2b..2d22ae4863a 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -90,7 +90,7 @@ class SMORMS3Update arma::mat x = (g % g) / (g2 + epsilon); - x.for_each( [stepSize](double &v) { v = std::min(v, stepSize); } ); + x.transform( [stepSize](double &v) { return std::min(v, stepSize); } ); iterate -= gradient % x / (arma::sqrt(g2) + epsilon); From 82933523e04e6f6da4f84f7ffca153d3462bbd95 Mon Sep 17 00:00:00 2001 From: Abhinav Moudgil Date: Sat, 1 Apr 2017 03:16:50 +0530 Subject: [PATCH 36/78] Improve comments --- src/mlpack/tests/convolutional_network_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp index 6d79f7605b2..26b4c28bd18 100644 --- a/src/mlpack/tests/convolutional_network_test.cpp +++ b/src/mlpack/tests/convolutional_network_test.cpp @@ -47,12 +47,12 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) { if (i < nPoints / 2) { - // class 1 - digit = 4 + // Assign label "1" to all samples with digit = 4 Y(i) = 1; } else { - // class 2 - digit = 9 + // Assign label "2" to all samples with digit = 9 Y(i) = 2; } } From cda0e912402461730a5091c897319296f91154ca Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sat, 1 Apr 2017 06:51:45 +0530 Subject: [PATCH 37/78] Address review comments * Use a local variable to keep count of iterations. * Change adaMax parameter to a template parameter. --- .../optimizers/ada_delta/ada_delta_update.hpp | 3 +- .../optimizers/ada_grad/ada_grad_update.hpp | 3 +- src/mlpack/core/optimizers/adam/adam.hpp | 14 ++------- src/mlpack/core/optimizers/adam/adam_impl.hpp | 26 +++++++--------- .../core/optimizers/adam/adam_update.hpp | 31 +++++++++---------- src/mlpack/core/optimizers/sgd/sgd_impl.hpp | 2 +- .../sgd/update_policies/momentum_update.hpp | 3 +- .../sgd/update_policies/vanilla_update.hpp | 3 +- .../logistic_regression.hpp | 2 +- .../logistic_regression_impl.hpp | 2 +- src/mlpack/tests/adam_test.cpp | 12 ++++--- 11 files changed, 44 insertions(+), 57 deletions(-) diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp index cb582239a8e..391e2a88bd3 100644 --- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp +++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp @@ -83,8 +83,7 @@ class AdaDeltaUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient, - const size_t /*i*/) + const arma::mat& gradient) { // Accumulate gradient. meanSquaredGradient *= rho; diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp index bc0f1296aed..2fb52573b1b 100644 --- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp +++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp @@ -79,8 +79,7 @@ class AdaGradUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient, - const size_t /*i*/) + const arma::mat& gradient) { squaredGradient += (gradient % gradient); iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) + epsilon); diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp index 9c729b4d887..e590c0b2813 100644 --- a/src/mlpack/core/optimizers/adam/adam.hpp +++ b/src/mlpack/core/optimizers/adam/adam.hpp @@ -64,7 +64,7 @@ namespace optimization { * @tparam DecomposableFunctionType Decomposable objective function type to be * minimized. */ -template +template class Adam { public: @@ -87,8 +87,6 @@ class Adam * @param tolerance Maximum absolute tolerance to terminate algorithm. * @param shuffle If true, the function order is shuffled; otherwise, each * function is visited in linear order. - * @param adaMax If true, then the AdaMax optimizer is used; otherwise, by - * default the Adam optimizer is used. */ Adam(DecomposableFunctionType& function, const double stepSize = 0.001, @@ -97,8 +95,7 @@ class Adam const double eps = 1e-8, const size_t maxIterations = 100000, const double tolerance = 1e-5, - const bool shuffle = true, - const bool adaMax = false); + const bool shuffle = true); /** * Optimize the given function using Adam. The given starting point will be @@ -153,14 +150,9 @@ class Adam //! Modify whether or not the individual functions are shuffled. bool& Shuffle() { return optimizer.Shuffle(); } - //! Get whether or not the AdaMax optimizer is specified. - bool AdaMax() const { return optimizer.UpdatePolicy().AdaMax(); } - //! Modify wehther or not the AdaMax optimizer is to be used. - bool& AdaMax() { return optimizer.UpdatePolicy().AdaMax(); } - private: //! The Stochastic Gradient Descent object with Adam policy. - SGD optimizer; + SGD > optimizer; }; } // namespace optimization diff --git a/src/mlpack/core/optimizers/adam/adam_impl.hpp b/src/mlpack/core/optimizers/adam/adam_impl.hpp index b9faf1226d9..4905a0cec24 100644 --- a/src/mlpack/core/optimizers/adam/adam_impl.hpp +++ b/src/mlpack/core/optimizers/adam/adam_impl.hpp @@ -21,25 +21,23 @@ namespace mlpack { namespace optimization { -template -Adam::Adam(DecomposableFunctionType& function, - const double stepSize, - const double beta1, - const double beta2, - const double epsilon, - const size_t maxIterations, - const double tolerance, - const bool shuffle, - const bool adaMax) : +template +Adam::Adam(DecomposableFunctionType& function, + const double stepSize, + const double beta1, + const double beta2, + const double epsilon, + const size_t maxIterations, + const double tolerance, + const bool shuffle) : optimizer(function, stepSize, maxIterations, tolerance, shuffle, - AdamUpdate(epsilon, - beta1, - beta2, - adaMax)) + AdamUpdate(epsilon, + beta1, + beta2)) { /* Nothing to do. */ } } // namespace optimization diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index fba575f4ce5..d5a74f17b4e 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -39,8 +39,11 @@ namespace optimization { * year = {2014} * } * @endcode - * + * + * @param adaMax If true, then the AdaMax optimizer is used; otherwise, by + * default the Adam optimizer is used. */ +template class AdamUpdate { public: @@ -52,12 +55,11 @@ class AdamUpdate */ AdamUpdate(const double epsilon = 1e-8, const double beta1 = 0.9, - const double beta2 = 0.999, - const bool adaMax = false) : + const double beta2 = 0.999) : epsilon(epsilon), beta1(beta1), beta2(beta2), - adaMax(adaMax) + iteration(0) { // Nothing to do. } @@ -92,9 +94,11 @@ class AdamUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient, - const size_t i) + const arma::mat& gradient) { + // Increment the iteration counter variable. + ++iteration; + // And update the iterate. m *= beta1; m += (1 - beta1) * gradient; @@ -111,8 +115,8 @@ class AdamUpdate v += (1 - beta2) * (gradient % gradient); } - const double biasCorrection1 = 1.0 - std::pow(beta1, (double) i); - const double biasCorrection2 = 1.0 - std::pow(beta2, (double) i); + const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration); + const double biasCorrection2 = 1.0 - std::pow(beta2, (double) iteration); if (adaMax) { @@ -146,11 +150,6 @@ class AdamUpdate //! Modify the second moment coefficient. double& Beta2() { return beta2; } - //! Get whether or not the AdaMax optimizer is specified. - bool AdaMax() const { return adaMax; } - //! Modify wehther or not the AdaMax optimizer is to be used. - bool& AdaMax() { return adaMax; } - private: // The epsilon value used to initialise the squared gradient parameter. double epsilon; @@ -161,9 +160,6 @@ class AdamUpdate // The second moment coefficient. double beta2; - //! Specifies whether or not the AdaMax optimizer is to be used. - bool adaMax; - // The exponential moving average of gradient values. arma::mat m; @@ -172,6 +168,9 @@ class AdamUpdate // The exponential moving average of squared gradient values. arma::mat v; + + // The number of iterations. + double iteration; }; } // namespace optimization diff --git a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp index ca756cb20e2..5ac19ba9607 100644 --- a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp +++ b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp @@ -108,7 +108,7 @@ double SGD::Optimize( function.Gradient(iterate, currentFunction, gradient); // Use the update policy to take a step. - updatePolicy.Update(iterate, stepSize, gradient, i); + updatePolicy.Update(iterate, stepSize, gradient); // Now add that to the overall objective function. if (shuffle) diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp index 7e4a1deeed3..2947c355f90 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp @@ -99,8 +99,7 @@ class MomentumUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient, - const size_t /*i*/) + const arma::mat& gradient) { velocity = momentum * velocity - stepSize * gradient; iterate += velocity; diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp index 159d7f3398f..1bd85bf1c37 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp @@ -52,8 +52,7 @@ class VanillaUpdate */ void Update(arma::mat& iterate, const double stepSize, - const arma::mat& gradient, - const size_t /*i*/) + const arma::mat& gradient) { // Perform the vanilla SGD update. iterate -= stepSize * gradient; diff --git a/src/mlpack/methods/logistic_regression/logistic_regression.hpp b/src/mlpack/methods/logistic_regression/logistic_regression.hpp index 7a03a0a04e0..a19f31b876a 100644 --- a/src/mlpack/methods/logistic_regression/logistic_regression.hpp +++ b/src/mlpack/methods/logistic_regression/logistic_regression.hpp @@ -119,7 +119,7 @@ class LogisticRegression * @param responses Outputs results from input training variables. */ template< - template class OptimizerType = mlpack::optimization::L_BFGS + template class OptimizerType = mlpack::optimization::L_BFGS > void Train(const MatType& predictors, const arma::Row& responses); diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp index 4974c975a59..94945258474 100644 --- a/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp +++ b/src/mlpack/methods/logistic_regression/logistic_regression_impl.hpp @@ -66,7 +66,7 @@ LogisticRegression::LogisticRegression( } template -template class OptimizerType> +template class OptimizerType> void LogisticRegression::Train(const MatType& predictors, const arma::Row& responses) { diff --git a/src/mlpack/tests/adam_test.cpp b/src/mlpack/tests/adam_test.cpp index 88d839fd5cf..6dae1e4320f 100644 --- a/src/mlpack/tests/adam_test.cpp +++ b/src/mlpack/tests/adam_test.cpp @@ -36,7 +36,8 @@ BOOST_AUTO_TEST_SUITE(AdamTest); BOOST_AUTO_TEST_CASE(SimpleAdamTestFunction) { SGDTestFunction f; - Adam optimizer(f, 1e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, true); + Adam optimizer(f, 1e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, + true); arma::mat coordinates = f.GetInitialPoint(); optimizer.Optimize(coordinates); @@ -52,8 +53,8 @@ BOOST_AUTO_TEST_CASE(SimpleAdamTestFunction) BOOST_AUTO_TEST_CASE(SimpleAdaMaxTestFunction) { SGDTestFunction f; - Adam optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, true - ,true); + Adam optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, + true); arma::mat coordinates = f.GetInitialPoint(); optimizer.Optimize(coordinates); @@ -174,8 +175,9 @@ BOOST_AUTO_TEST_CASE(AdaMaxLogisticRegressionTest) LogisticRegression<> lr(shuffledData.n_rows, 0.5); LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5); - Adam > adamax(lrf, 1e-3, 0.9, 0.999, 1e-8, 5000000, - 1e-9, true, true); + Adam, true> adamax(lrf, 1e-3, 0.9, 0.999, 1e-8, + 5000000, 1e-9, true); + lr.Train(adamax); // Ensure that the error is close to zero. From e01bb43b2474169f03668bc90467e65efe3443a1 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 01:20:06 +0530 Subject: [PATCH 38/78] Create separate UpdateRule classes for Adam and AdaMax --- .../core/optimizers/adam/CMakeLists.txt | 1 + src/mlpack/core/optimizers/adam/adam.hpp | 19 ++- src/mlpack/core/optimizers/adam/adam_impl.hpp | 25 +-- .../core/optimizers/adam/adam_update.hpp | 51 ++----- .../core/optimizers/adam/adamax_update.hpp | 143 ++++++++++++++++++ src/mlpack/tests/adam_test.cpp | 8 +- 6 files changed, 186 insertions(+), 61 deletions(-) create mode 100644 src/mlpack/core/optimizers/adam/adamax_update.hpp diff --git a/src/mlpack/core/optimizers/adam/CMakeLists.txt b/src/mlpack/core/optimizers/adam/CMakeLists.txt index eabdbbaafa5..1377bbb0e15 100644 --- a/src/mlpack/core/optimizers/adam/CMakeLists.txt +++ b/src/mlpack/core/optimizers/adam/CMakeLists.txt @@ -2,6 +2,7 @@ set(SOURCES adam.hpp adam_impl.hpp adam_update.hpp + adamax_update.hpp ) set(DIR_SRCS) diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp index e590c0b2813..f81b3efb0e5 100644 --- a/src/mlpack/core/optimizers/adam/adam.hpp +++ b/src/mlpack/core/optimizers/adam/adam.hpp @@ -22,6 +22,7 @@ #include #include "adam_update.hpp" +#include "adamax_update.hpp" namespace mlpack { namespace optimization { @@ -63,9 +64,13 @@ namespace optimization { * * @tparam DecomposableFunctionType Decomposable objective function type to be * minimized. + * @tparam UpdateRule Adam optimizer update rule to be used. */ -template -class Adam +template< + typename DecomposableFunctionType, + typename UpdateRule = AdamUpdate +> +class AdamType { public: /** @@ -88,7 +93,7 @@ class Adam * @param shuffle If true, the function order is shuffled; otherwise, each * function is visited in linear order. */ - Adam(DecomposableFunctionType& function, + AdamType(DecomposableFunctionType& function, const double stepSize = 0.001, const double beta1 = 0.9, const double beta2 = 0.999, @@ -152,9 +157,15 @@ class Adam private: //! The Stochastic Gradient Descent object with Adam policy. - SGD > optimizer; + SGD optimizer; }; +template +using Adam = AdamType; + +template +using AdaMax = AdamType; + } // namespace optimization } // namespace mlpack diff --git a/src/mlpack/core/optimizers/adam/adam_impl.hpp b/src/mlpack/core/optimizers/adam/adam_impl.hpp index 4905a0cec24..82408de71e0 100644 --- a/src/mlpack/core/optimizers/adam/adam_impl.hpp +++ b/src/mlpack/core/optimizers/adam/adam_impl.hpp @@ -21,23 +21,24 @@ namespace mlpack { namespace optimization { -template -Adam::Adam(DecomposableFunctionType& function, - const double stepSize, - const double beta1, - const double beta2, - const double epsilon, - const size_t maxIterations, - const double tolerance, - const bool shuffle) : +template +AdamType::AdamType( + DecomposableFunctionType& function, + const double stepSize, + const double beta1, + const double beta2, + const double epsilon, + const size_t maxIterations, + const double tolerance, + const bool shuffle) : optimizer(function, stepSize, maxIterations, tolerance, shuffle, - AdamUpdate(epsilon, - beta1, - beta2)) + UpdateRule(epsilon, + beta1, + beta2)) { /* Nothing to do. */ } } // namespace optimization diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index d5a74f17b4e..aa20e2f752d 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -39,11 +39,7 @@ namespace optimization { * year = {2014} * } * @endcode - * - * @param adaMax If true, then the AdaMax optimizer is used; otherwise, by - * default the Adam optimizer is used. */ -template class AdamUpdate { public: @@ -75,14 +71,7 @@ class AdamUpdate const size_t cols) { m = arma::zeros(rows, cols); - if (adaMax) - { - u = arma::zeros(rows, cols); - } - else - { - v = arma::zeros(rows, cols); - } + v = arma::zeros(rows, cols); } /** @@ -103,36 +92,19 @@ class AdamUpdate m *= beta1; m += (1 - beta1) * gradient; - if (adaMax) - { - // Update the exponentially weighted infinity norm. - u *= beta2; - u = arma::max(u, arma::abs(gradient)); - } - else - { - v *= beta2; - v += (1 - beta2) * (gradient % gradient); - } + v *= beta2; + v += (1 - beta2) * (gradient % gradient); const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration); const double biasCorrection2 = 1.0 - std::pow(beta2, (double) iteration); - if (adaMax) - { - if (biasCorrection1 != 0.0) - iterate -= (stepSize / biasCorrection1 * m / (u + epsilon)); - } - else - { - /** - * It should be noted that the term, m / (arma::sqrt(v) + eps), in the - * following expression is an approximation of the following actual term; - * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps). - */ - iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) * - m / (arma::sqrt(v) + epsilon); - } + /** + * It should be noted that the term, m / (arma::sqrt(v) + eps), in the + * following expression is an approximation of the following actual term; + * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps). + */ + iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) * + m / (arma::sqrt(v) + epsilon); } //! Get the value used to initialise the squared gradient parameter. @@ -163,9 +135,6 @@ class AdamUpdate // The exponential moving average of gradient values. arma::mat m; - // The exponentially weighted infinity norm. - arma::mat u; - // The exponential moving average of squared gradient values. arma::mat v; diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp new file mode 100644 index 00000000000..7aa5b6b9908 --- /dev/null +++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp @@ -0,0 +1,143 @@ +/** + * @file adam.hpp + * @author Ryan Curtin + * @author Vasanth Kalingeri + * @author Marcus Edel + * @author Vivek Pal + * + * AdaMax update rule. Adam is an an algorithm for first-order gradient- + * -based optimization of stochastic objective functions, based on adaptive + * estimates of lower-order moments. AdaMax is simply a variant of Adam based + * on the infinity norm. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_OPTIMIZERS_ADAM_ADAMAX_UPDATE_HPP +#define MLPACK_CORE_OPTIMIZERS_ADAM_ADAMAX_UPDATE_HPP + +#include + +namespace mlpack { +namespace optimization { + +/** + * AdaMax is a variant of Adam, an optimizer that computes individual adaptive + * learning rates for different parameters from estimates of first and second + * moments of the gradients.based on the infinity norm as given in the section + * 7 of the following paper. + * + * For more information, see the following. + * + * @code + * @article{Kingma2014, + * author = {Diederik P. Kingma and Jimmy Ba}, + * title = {Adam: {A} Method for Stochastic Optimization}, + * journal = {CoRR}, + * year = {2014} + * } + * @endcode + */ +class AdaMaxUpdate +{ + public: + /** + * Construct the AdaMax update policy with the given epsilon parameter. + * + * @param epsilon The epsilon value used to initialise the squared gradient + * parameter. + */ + AdaMaxUpdate(const double epsilon = 1e-8, + const double beta1 = 0.9, + const double beta2 = 0.999) : + epsilon(epsilon), + beta1(beta1), + beta2(beta2), + iteration(0) + { + // Nothing to do. + } + + /** + * The Initialize method is called by SGD Optimizer method before the start of + * the iteration update process. + * + * @param rows number of rows in the gradient matrix. + * @param cols number of columns in the gradient matrix. + */ + void Initialize(const size_t rows, + const size_t cols) + { + m = arma::zeros(rows, cols); + u = arma::zeros(rows, cols); + } + + /** + * Update step for Adam. + * + * @param iterate Parameters that minimize the function. + * @param stepSize Step size to be used for the given iteration. + * @param gradient The gradient matrix. + */ + void Update(arma::mat& iterate, + const double stepSize, + const arma::mat& gradient) + { + // Increment the iteration counter variable. + ++iteration; + + // And update the iterate. + m *= beta1; + m += (1 - beta1) * gradient; + + // Update the exponentially weighted infinity norm. + u *= beta2; + u = arma::max(u, arma::abs(gradient)); + + const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration); + + if (biasCorrection1 != 0) + iterate -= (stepSize / biasCorrection1 * m / (u + epsilon)); + } + + //! Get the value used to initialise the squared gradient parameter. + double Epsilon() const { return epsilon; } + //! Modify the value used to initialise the squared gradient parameter. + double& Epsilon() { return epsilon; } + + //! Get the smoothing parameter. + double Beta1() const { return beta1; } + //! Modify the smoothing parameter. + double& Beta1() { return beta1; } + + //! Get the second moment coefficient. + double Beta2() const { return beta2; } + //! Modify the second moment coefficient. + double& Beta2() { return beta2; } + + private: + // The epsilon value used to initialise the squared gradient parameter. + double epsilon; + + // The smoothing parameter. + double beta1; + + // The second moment coefficient. + double beta2; + + // The exponential moving average of gradient values. + arma::mat m; + + // The exponentially weighted infinity norm. + arma::mat u; + + // The number of iterations. + double iteration; +}; + +} // namespace optimization +} // namespace mlpack + +#endif diff --git a/src/mlpack/tests/adam_test.cpp b/src/mlpack/tests/adam_test.cpp index 6dae1e4320f..a88e3618f8a 100644 --- a/src/mlpack/tests/adam_test.cpp +++ b/src/mlpack/tests/adam_test.cpp @@ -53,8 +53,8 @@ BOOST_AUTO_TEST_CASE(SimpleAdamTestFunction) BOOST_AUTO_TEST_CASE(SimpleAdaMaxTestFunction) { SGDTestFunction f; - Adam optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, - true); + AdaMax optimizer(f, 2e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, + true); arma::mat coordinates = f.GetInitialPoint(); optimizer.Optimize(coordinates); @@ -175,8 +175,8 @@ BOOST_AUTO_TEST_CASE(AdaMaxLogisticRegressionTest) LogisticRegression<> lr(shuffledData.n_rows, 0.5); LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5); - Adam, true> adamax(lrf, 1e-3, 0.9, 0.999, 1e-8, - 5000000, 1e-9, true); + AdaMax > adamax(lrf, 1e-3, 0.9, 0.999, 1e-8, + 5000000, 1e-9, true); lr.Train(adamax); From 177690bada1700da3a94d762b0f85dffb945870c Mon Sep 17 00:00:00 2001 From: Sagar B Hathwar Date: Sun, 2 Apr 2017 09:30:23 +0530 Subject: [PATCH 39/78] Include add_merge.hpp instead of itself in add_merge_impl.hpp --- src/mlpack/methods/ann/layer/add_merge_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/methods/ann/layer/add_merge_impl.hpp b/src/mlpack/methods/ann/layer/add_merge_impl.hpp index 0ef62c2d0ac..583a4c1bee6 100644 --- a/src/mlpack/methods/ann/layer/add_merge_impl.hpp +++ b/src/mlpack/methods/ann/layer/add_merge_impl.hpp @@ -14,7 +14,7 @@ #define MLPACK_METHODS_ANN_LAYER_ADD_MERGE_IMPL_HPP // In case it hasn't yet been included. -#include "add_merge_impl.hpp" +#include "add_merge.hpp" namespace mlpack { namespace ann /** Artificial Neural Network. */ { From d77444f5b1a9aa760cefb873dbb60ba873731c4a Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 19:44:47 +0530 Subject: [PATCH 40/78] Add parameters description --- src/mlpack/core/optimizers/adam/adam_update.hpp | 2 ++ src/mlpack/core/optimizers/adam/adamax_update.hpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index aa20e2f752d..71a283632a5 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -48,6 +48,8 @@ class AdamUpdate * * @param epsilon The epsilon value used to initialise the squared gradient * parameter. + * @param beta1 The smoothing parameter. + * @param beta2 The second moment coefficient. */ AdamUpdate(const double epsilon = 1e-8, const double beta1 = 0.9, diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp index 7aa5b6b9908..a23996f30a0 100644 --- a/src/mlpack/core/optimizers/adam/adamax_update.hpp +++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp @@ -48,6 +48,8 @@ class AdaMaxUpdate * * @param epsilon The epsilon value used to initialise the squared gradient * parameter. + * @param beta1 The smoothing parameter. + * @param beta2 The second moment coefficient. */ AdaMaxUpdate(const double epsilon = 1e-8, const double beta1 = 0.9, From 565242ef2f0b85616e419e1246d4ce617330771a Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 19:45:37 +0530 Subject: [PATCH 41/78] Fix alignment issue --- src/mlpack/core/optimizers/adam/adam.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp index f81b3efb0e5..37411d9bb0b 100644 --- a/src/mlpack/core/optimizers/adam/adam.hpp +++ b/src/mlpack/core/optimizers/adam/adam.hpp @@ -94,13 +94,13 @@ class AdamType * function is visited in linear order. */ AdamType(DecomposableFunctionType& function, - const double stepSize = 0.001, - const double beta1 = 0.9, - const double beta2 = 0.999, - const double eps = 1e-8, - const size_t maxIterations = 100000, - const double tolerance = 1e-5, - const bool shuffle = true); + const double stepSize = 0.001, + const double beta1 = 0.9, + const double beta2 = 0.999, + const double eps = 1e-8, + const size_t maxIterations = 100000, + const double tolerance = 1e-5, + const bool shuffle = true); /** * Optimize the given function using Adam. The given starting point will be From 16a27fc242de1d493b18805333e2bc8c72fe8d73 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 19:55:11 +0530 Subject: [PATCH 42/78] Fix macros --- src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index 2d22ae4863a..c44448f05a3 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -9,8 +9,8 @@ * 3-clause BSD license along with mlpack. If not, see * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP -#define MLPACK_CORE_OPTIMIZERS_SMORMS3_UPDATE_HPP +#ifndef MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_UPDATE_HPP +#define MLPACK_CORE_OPTIMIZERS_SMORMS3_SMORMS3_UPDATE_HPP #include From d23d7eb55c822a63656b1627fad2dd66904ab287 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 19:59:12 +0530 Subject: [PATCH 43/78] Remove the comment for an unused parameter --- src/mlpack/core/optimizers/smorms3/smorms3_update.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index c44448f05a3..fbc534926be 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -57,7 +57,6 @@ class SMORMS3Update * * @param rows number of rows in the gradient matrix. * @param cols number of columns in the gradient matrix. - * @param lRate */ void Initialize(const size_t rows, const size_t cols) From c64c610374b98ad3b9f126a19d41b40e7083963c Mon Sep 17 00:00:00 2001 From: sidak Date: Sun, 2 Apr 2017 21:18:34 +0530 Subject: [PATCH 44/78] Correct minor typo in ffn.hpp --- src/mlpack/methods/ann/ffn.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp index 2bd20cfa2fb..051a5b03a69 100644 --- a/src/mlpack/methods/ann/ffn.hpp +++ b/src/mlpack/methods/ann/ffn.hpp @@ -204,7 +204,7 @@ class FFN void Gradient(); /** - * Reset the module infomration (weights/parameters). + * Reset the module information (weights/parameters). */ void ResetParameters(); From ce245d1dbf96d2b81334e9ff0f807015029a3433 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 22:37:08 +0530 Subject: [PATCH 45/78] Correct the file name --- src/mlpack/core/optimizers/adam/adam_update.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index 71a283632a5..c6e61b1ff42 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -1,5 +1,5 @@ /** - * @file adam.hpp + * @file adam_update.hpp * @author Ryan Curtin * @author Vasanth Kalingeri * @author Marcus Edel From de7ff08cc59d7f790e9d68d8247f3aec1dcb1a17 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Tue, 4 Apr 2017 18:35:15 +0530 Subject: [PATCH 46/78] Remove comments about AdaMax in adam_update.hpp --- src/mlpack/core/optimizers/adam/adam_update.hpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index c6e61b1ff42..7e42132dfe2 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -5,10 +5,9 @@ * @author Marcus Edel * @author Vivek Pal * - * Adam and AdaMax optimizer. Adam is an an algorithm for first-order gradient- - * -based optimization of stochastic objective functions, based on adaptive - * estimates of lower-order moments. AdaMax is simply a variant of Adam based - * on the infinity norm. + * Adam optimizer. Adam is an an algorithm for first-order gradient-based + * optimization of stochastic objective functions, based on adaptive estimates + * of lower-order moments. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -26,8 +25,7 @@ namespace optimization { /** * Adam is an optimizer that computes individual adaptive learning rates for * different parameters from estimates of first and second moments of the - * gradients. AdaMax is a variant of Adam based on the infinity norm as given - * in the section 7 of the following paper. + * gradients as given in the section 7 of the following paper. * * For more information, see the following. * From bec9a20185e7c45973db222fbef1662c3d1b5524 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Tue, 4 Apr 2017 18:36:35 +0530 Subject: [PATCH 47/78] Remove a redundant type cast --- src/mlpack/core/optimizers/adam/adamax_update.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp index a23996f30a0..17a33d2d26b 100644 --- a/src/mlpack/core/optimizers/adam/adamax_update.hpp +++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp @@ -98,7 +98,7 @@ class AdaMaxUpdate u *= beta2; u = arma::max(u, arma::abs(gradient)); - const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration); + const double biasCorrection1 = 1.0 - std::pow(beta1, iteration); if (biasCorrection1 != 0) iterate -= (stepSize / biasCorrection1 * m / (u + epsilon)); From c049330b118e7006545fb2ddbeb3bad1ec65ea79 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Tue, 4 Apr 2017 18:40:12 +0530 Subject: [PATCH 48/78] Update a few comments --- src/mlpack/core/optimizers/adam/adam_update.hpp | 2 +- src/mlpack/core/optimizers/adam/adamax_update.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index 7e42132dfe2..95920c14ee7 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -42,7 +42,7 @@ class AdamUpdate { public: /** - * Construct the Adam update policy with the given epsilon parameter. + * Construct the Adam update policy with the given parameters. * * @param epsilon The epsilon value used to initialise the squared gradient * parameter. diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp index 17a33d2d26b..97e26fe683d 100644 --- a/src/mlpack/core/optimizers/adam/adamax_update.hpp +++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp @@ -1,5 +1,5 @@ /** - * @file adam.hpp + * @file adamax_update.hpp * @author Ryan Curtin * @author Vasanth Kalingeri * @author Marcus Edel @@ -44,7 +44,7 @@ class AdaMaxUpdate { public: /** - * Construct the AdaMax update policy with the given epsilon parameter. + * Construct the AdaMax update policy with the given parameters. * * @param epsilon The epsilon value used to initialise the squared gradient * parameter. From e879e68a77fb0cbc2ed0a1817af77ab6da9a2e7b Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 5 Apr 2017 01:21:45 +0200 Subject: [PATCH 49/78] Use auxiliary memory to avoid copy in the iteration step. --- .../randomized_block_krylov_svd.cpp | 25 ++++++++++++------- .../randomized_block_krylov_svd.hpp | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp index 9009c5f6422..41356e8e50a 100644 --- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp @@ -2,7 +2,7 @@ * @file randomized_block_krylov_svd.cpp * @author Marcus Edel * - * Implementation of the randomized SVD method. + * Implementation of the randomized block krylov SVD method. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -49,7 +49,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, arma::mat& v, const size_t rank) { - arma::mat Q, R, block; + arma::mat Q, R, block, blockIteration; if (blockSize == 0) { @@ -61,16 +61,23 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, // Construct and orthonormalize Krlov subspace. arma::mat K(data.n_rows, blockSize * (maxIterations + 1)); - arma::qr_econ(block, R, data * G); - // Copy the temporary memory to the right place. - K.submat(0, 0, block.n_rows - 1, block.n_cols - 1) = block; + // Create a working matrix using data from writable auxiliary memory + // (K matrix). Doing so avoids an uncessary copy in upcoming step. + block = arma::mat(K.memptr(), data.n_rows, blockSize, false); + arma::qr_econ(block, R, data * G); - for (size_t i = 0, b = block.n_cols; i < maxIterations; ++i, - b += block.n_cols) + for (size_t blockOffset = block.n_elem; blockOffset < K.n_elem; + blockOffset += block.n_elem) { - arma::qr_econ(block, R, data * (data.t() * block)); - K.submat(0, b, block.n_rows - 1, b + block.n_cols - 1) = block; + // Temporary working matrix to store the result in the correct place. + blockIteration = arma::mat(K.memptr() + blockOffset, data.n_rows, + blockSize, false); + + arma::qr_econ(blockIteration, R, data * (data.t() * block)); + + // Update working matrix for the next iteration. + block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false); } arma::qr_econ(Q, R, K); diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp index f7d77ab4c50..06ef8b4c2b6 100644 --- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.hpp @@ -2,7 +2,7 @@ * @file randomized_block_krylov_svd.hpp * @author Marcus Edel * - * An implementation of the randomized SVD method. + * An implementation of the randomized block krylov SVD method. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the From 0a2e8cc34e3f84db4faf2a05dec50cf63bd0d6df Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 5 Apr 2017 01:24:12 +0200 Subject: [PATCH 50/78] Minor spelling fix. --- .../methods/block_krylov_svd/randomized_block_krylov_svd.cpp | 2 +- src/mlpack/tests/block_krylov_svd_test.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp index 41356e8e50a..e0f689b63e1 100644 --- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp @@ -59,7 +59,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, // Random block initialization. arma::mat G = arma::randn(data.n_rows, blockSize); - // Construct and orthonormalize Krlov subspace. + // Construct and orthonormalize Krylov subspace. arma::mat K(data.n_rows, blockSize * (maxIterations + 1)); // Create a working matrix using data from writable auxiliary memory diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp index 4ca9c6258d2..5d7e6e2e283 100644 --- a/src/mlpack/tests/block_krylov_svd_test.cpp +++ b/src/mlpack/tests/block_krylov_svd_test.cpp @@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDReconstructionError) /* * Check if the method can handle noisy matrices. */ -BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNosiyLowRankTest) +BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest) { arma::mat data; CreateNoisyLowRankMatrix(data, 100, 1000, 5, 1.0); From 6b097f28d5317628130aede16f019d2abe37a268 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 5 Apr 2017 20:21:59 +0200 Subject: [PATCH 51/78] It isn't guaranteed that the recurrent network test will converge in the specified number of iterations using random weights (if the test pased in 1 of 5 times that's fine). --- src/mlpack/tests/recurrent_network_test.cpp | 114 +++++++++++--------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp index b726fc84ce9..73df32a209d 100644 --- a/src/mlpack/tests/recurrent_network_test.cpp +++ b/src/mlpack/tests/recurrent_network_test.cpp @@ -359,76 +359,90 @@ void ReberGrammarTestNetwork(bool embedded = false) * . . * ....... */ - const size_t outputSize = 7; - const size_t inputSize = 7; - const size_t rho = trainInput.at(0, 0).n_elem / inputSize; + // It isn't guaranteed that the recurrent network will converge in the + // specified number of iterations using random weights. If this works 1 of 5 + // times, I'm fine with that. All I want to know is that the network is able + // to escape from local minima and to solve the task. + size_t successes = 0; + for (size_t trial = 0; trial < 5; ++trial) + { + const size_t outputSize = 7; + const size_t inputSize = 7; + const size_t rho = trainInput.at(0, 0).n_elem / inputSize; - RNN > model(rho); + RNN > model(rho); - model.Add >(); - model.Add >(inputSize, 20); - model.Add >(20, 7, rho); - model.Add >(7, outputSize); - model.Add >(); + model.Add >(); + model.Add >(inputSize, 20); + model.Add >(20, 7, rho); + model.Add >(7, outputSize); + model.Add >(); - StandardSGD opt(model, 0.1, 2, -50000); + StandardSGD opt(model, 0.1, 2, -50000); - arma::mat inputTemp, labelsTemp; - for (size_t i = 0; i < 40; i++) - { - for (size_t j = 0; j < trainReberGrammarCount; j++) + arma::mat inputTemp, labelsTemp; + for (size_t i = 0; i < 20; i++) { - inputTemp = trainInput.at(0, j); - labelsTemp = trainLabels.at(0, j); + for (size_t j = 0; j < trainReberGrammarCount; j++) + { + inputTemp = trainInput.at(0, j); + labelsTemp = trainLabels.at(0, j); - model.Train(inputTemp, labelsTemp, opt); + model.Train(inputTemp, labelsTemp, opt); + } } - } - double error = 0; + double error = 0; - // Ask the network to predict the next Reber grammar in the given sequence. - for (size_t i = 0; i < testReberGrammarCount; i++) - { - arma::mat output, prediction; - arma::mat input = testInput.at(0, i); + // Ask the network to predict the next Reber grammar in the given sequence. + for (size_t i = 0; i < testReberGrammarCount; i++) + { + arma::mat output, prediction; + arma::mat input = testInput.at(0, i); - model.Predict(input, prediction); - data::Binarize(prediction, output, 0.5); + model.Predict(input, prediction); + data::Binarize(prediction, output, 0.5); - const size_t reberGrammerSize = 7; - std::string inputReber = ""; + const size_t reberGrammerSize = 7; + std::string inputReber = ""; - size_t reberError = 0; - for (size_t j = 0; j < (output.n_elem / reberGrammerSize); j++) - { - if (arma::sum(arma::sum(output.submat(j * reberGrammerSize, 0, (j + 1) * - reberGrammerSize - 1, 0))) != 1) break; + size_t reberError = 0; + for (size_t j = 0; j < (output.n_elem / reberGrammerSize); j++) + { + if (arma::sum(arma::sum(output.submat(j * reberGrammerSize, 0, (j + 1) * + reberGrammerSize - 1, 0))) != 1) break; - char predictedSymbol, inputSymbol; - std::string reberChoices; + char predictedSymbol, inputSymbol; + std::string reberChoices; - ReberReverseTranslation(output.submat(j * reberGrammerSize, 0, (j + 1) * - reberGrammerSize - 1, 0), predictedSymbol); - ReberReverseTranslation(input.submat(j * reberGrammerSize, 0, (j + 1) * - reberGrammerSize - 1, 0), inputSymbol); - inputReber += inputSymbol; + ReberReverseTranslation(output.submat(j * reberGrammerSize, 0, (j + 1) * + reberGrammerSize - 1, 0), predictedSymbol); + ReberReverseTranslation(input.submat(j * reberGrammerSize, 0, (j + 1) * + reberGrammerSize - 1, 0), inputSymbol); + inputReber += inputSymbol; - if (embedded) - GenerateNextEmbeddedReber(transitions, inputReber, reberChoices); - else - GenerateNextReber(transitions, inputReber, reberChoices); + if (embedded) + GenerateNextEmbeddedReber(transitions, inputReber, reberChoices); + else + GenerateNextReber(transitions, inputReber, reberChoices); - if (reberChoices.find(predictedSymbol) != std::string::npos) - reberError++; + if (reberChoices.find(predictedSymbol) != std::string::npos) + reberError++; + } + + if (reberError != (output.n_elem / reberGrammerSize)) + error += 1; } - if (reberError != (output.n_elem / reberGrammerSize)) - error += 1; + error /= testReberGrammarCount; + if (error <= 0.2) + { + ++successes; + break; + } } - error /= testReberGrammarCount; - BOOST_REQUIRE_LE(error, 0.2); + BOOST_REQUIRE_GE(successes, 1); } /** From 3026b5026ef1c71ae49e666ea55ecddef08634f8 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 5 Apr 2017 23:50:41 +0200 Subject: [PATCH 52/78] The default setting of strict (strict = false use the auxiliary memory until a size change) in versions 5.600 and earlier is true, so to make sure it's always false we set it explicitly. --- .../methods/block_krylov_svd/randomized_block_krylov_svd.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp index e0f689b63e1..702d706fd28 100644 --- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp @@ -64,7 +64,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, // Create a working matrix using data from writable auxiliary memory // (K matrix). Doing so avoids an uncessary copy in upcoming step. - block = arma::mat(K.memptr(), data.n_rows, blockSize, false); + block = arma::mat(K.memptr(), data.n_rows, blockSize, false, false); arma::qr_econ(block, R, data * G); for (size_t blockOffset = block.n_elem; blockOffset < K.n_elem; @@ -77,7 +77,8 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, arma::qr_econ(blockIteration, R, data * (data.t() * block)); // Update working matrix for the next iteration. - block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false); + block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false, + false); } arma::qr_econ(Q, R, K); From 8116544edee856141f12a2b9a13732a83671bdd3 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 22:31:32 +0530 Subject: [PATCH 53/78] Add implementation of the RMSprop update policy --- .../core/optimizers/rmsprop/CMakeLists.txt | 1 + .../core/optimizers/rmsprop/rmsprop.hpp | 64 ++++------ .../core/optimizers/rmsprop/rmsprop_impl.hpp | 109 ++-------------- .../optimizers/rmsprop/rmsprop_update.hpp | 116 ++++++++++++++++++ 4 files changed, 153 insertions(+), 137 deletions(-) create mode 100644 src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp diff --git a/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt b/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt index 75c30c67bb9..da3c7681030 100644 --- a/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt +++ b/src/mlpack/core/optimizers/rmsprop/CMakeLists.txt @@ -1,6 +1,7 @@ set(SOURCES rmsprop.hpp rmsprop_impl.hpp + rmsprop_update.hpp ) set(DIR_SRCS) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp index 58969e5fcb5..31301a99b27 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp @@ -2,6 +2,7 @@ * @file rmsprop.hpp * @author Ryan Curtin * @author Marcus Edel + * @author Vivek Pal * * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of * recent gradients to normalize the gradients. @@ -16,6 +17,9 @@ #include +#include +#include "rmsprop_update.hpp" + namespace mlpack { namespace optimization { @@ -76,7 +80,7 @@ class RMSprop * @param stepSize Step size for each iteration. * @param alpha Smoothing constant, similar to that used in AdaDelta and * momentum methods. - * @param eps Value used to initialise the mean squared gradient parameter. + * @param epsilon Value used to initialise the mean squared gradient parameter. * @param maxIterations Maximum number of iterations allowed (0 means no * limit). * @param tolerance Maximum absolute tolerance to terminate algorithm. @@ -86,7 +90,7 @@ class RMSprop RMSprop(DecomposableFunctionType& function, const double stepSize = 0.01, const double alpha = 0.99, - const double eps = 1e-8, + const double epsilon = 1e-8, const size_t maxIterations = 100000, const double tolerance = 1e-5, const bool shuffle = true); @@ -99,65 +103,49 @@ class RMSprop * @param iterate Starting point (will be modified). * @return Objective value of the final point. */ - double Optimize(arma::mat& iterate); + double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); } //! Get the instantiated function to be optimized. - const DecomposableFunctionType& Function() const { return function; } + const DecomposableFunctionType& Function() const + { + return optimizer.Function(); + } //! Modify the instantiated function. - DecomposableFunctionType& Function() { return function; } + DecomposableFunctionType& Function() { return optimizer.Function(); } //! Get the step size. - double StepSize() const { return stepSize; } + double StepSize() const { return optimizer.StepSize(); } //! Modify the step size. - double& StepSize() { return stepSize; } + double& StepSize() { return optimizer.StepSize(); } //! Get the smoothing parameter. - double Alpha() const { return alpha; } + double Alpha() const { return optimizer.UpdatePolicy().Alpha(); } //! Modify the smoothing parameter. - double& Alpha() { return alpha; } + double& Alpha() { return optimizer.UpdatePolicy().Alpha(); } //! Get the value used to initialise the mean squared gradient parameter. - double Epsilon() const { return eps; } + double Epsilon() const { return optimizer.UpdatePolicy().Alpha(); } //! Modify the value used to initialise the mean squared gradient parameter. - double& Epsilon() { return eps; } + double& Epsilon() { return optimizer.UpdatePolicy().Alpha(); } //! Get the maximum number of iterations (0 indicates no limit). - size_t MaxIterations() const { return maxIterations; } + size_t MaxIterations() const { return optimizer.MaxIterations(); } //! Modify the maximum number of iterations (0 indicates no limit). - size_t& MaxIterations() { return maxIterations; } + size_t& MaxIterations() { return optimizer.MaxIterations(); } //! Get the tolerance for termination. - double Tolerance() const { return tolerance; } + double Tolerance() const { return optimizer.Tolerance(); } //! Modify the tolerance for termination. - double& Tolerance() { return tolerance; } + double& Tolerance() { return optimizer.Tolerance(); } //! Get whether or not the individual functions are shuffled. - bool Shuffle() const { return shuffle; } + bool Shuffle() const { return optimizer.Shuffle(); } //! Modify whether or not the individual functions are shuffled. - bool& Shuffle() { return shuffle; } + bool& Shuffle() { return optimizer.Shuffle(); } private: - //! The instantiated function. - DecomposableFunctionType& function; - - //! The step size for each example. - double stepSize; - - //! The smoothing parameter. - double alpha; - - //! The value used to initialise the mean squared gradient parameter. - double eps; - - //! The maximum number of allowed iterations. - size_t maxIterations; - - //! The tolerance for termination. - double tolerance; - - //! Controls whether or not the individual functions are shuffled when - //! iterating. - bool shuffle; + //! The Stochastic Gradient Descent object with RMSpropUpdate policy. + SGD optimizer; }; } // namespace optimization diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp index a06814b08f9..71a457e437f 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp @@ -2,8 +2,9 @@ * @file rmsprop_impl.hpp * @author Ryan Curtin * @author Marcus Edel + * @author Vivek Pal * - * Implementation of the RMSprop optimizer. + * Implementation of the RMSprop constructor. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -23,109 +24,19 @@ template RMSprop::RMSprop(DecomposableFunctionType& function, const double stepSize, const double alpha, - const double eps, + const double epsilon, const size_t maxIterations, const double tolerance, const bool shuffle) : - function(function), - stepSize(stepSize), - alpha(alpha), - eps(eps), - maxIterations(maxIterations), - tolerance(tolerance), - shuffle(shuffle) + optimizer(function, + stepSize, + maxIterations, + tolerance, + shuffle, + RMSpropUpdate(epsilon, + alpha)) { /* Nothing to do. */ } -//! Optimize the function (minimize). -template -double RMSprop::Optimize(arma::mat& iterate) -{ - // Find the number of functions to use. - const size_t numFunctions = function.NumFunctions(); - - // This is used only if shuffle is true. - arma::Col visitationOrder; - if (shuffle) - visitationOrder = arma::shuffle(arma::linspace>(0, - (numFunctions - 1), numFunctions)); - - // To keep track of where we are and how things are going. - size_t currentFunction = 0; - double overallObjective = 0; - double lastObjective = DBL_MAX; - - // Calculate the first objective function. - for (size_t i = 0; i < numFunctions; ++i) - overallObjective += function.Evaluate(iterate, i); - - // Now iterate! - arma::mat gradient(iterate.n_rows, iterate.n_cols); - - // Leaky sum of squares of parameter gradient. - arma::mat meanSquaredGradient = arma::zeros(iterate.n_rows, - iterate.n_cols); - - for (size_t i = 1; i != maxIterations; ++i, ++currentFunction) - { - // Is this iteration the start of a sequence? - if ((currentFunction % numFunctions) == 0) - { - // Output current objective function. - Log::Info << "RMSprop: iteration " << i << ", objective " - << overallObjective << "." << std::endl; - - if (std::isnan(overallObjective) || std::isinf(overallObjective)) - { - Log::Warn << "RMSprop: converged to " << overallObjective - << "; terminating with failure. Try a smaller step size?" - << std::endl; - return overallObjective; - } - - if (std::abs(lastObjective - overallObjective) < tolerance) - { - Log::Info << "RMSprop: minimized within tolerance " << tolerance << "; " - << "terminating optimization." << std::endl; - return overallObjective; - } - - // Reset the counter variables. - lastObjective = overallObjective; - overallObjective = 0; - currentFunction = 0; - - if (shuffle) // Determine order of visitation. - visitationOrder = arma::shuffle(visitationOrder); - } - - // Evaluate the gradient for this iteration. - if (shuffle) - function.Gradient(iterate, visitationOrder[currentFunction], gradient); - else - function.Gradient(iterate, currentFunction, gradient); - - // And update the iterate. - meanSquaredGradient *= alpha; - meanSquaredGradient += (1 - alpha) * (gradient % gradient); - iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + eps); - - // Now add that to the overall objective function. - if (shuffle) - overallObjective += function.Evaluate(iterate, - visitationOrder[currentFunction]); - else - overallObjective += function.Evaluate(iterate, currentFunction); - } - - Log::Info << "RMSprop: maximum iterations (" << maxIterations << ") reached; " - << "terminating optimization." << std::endl; - // Calculate final objective. - overallObjective = 0; - for (size_t i = 0; i < numFunctions; ++i) - overallObjective += function.Evaluate(iterate, i); - return overallObjective; -} - } // namespace optimization } // namespace mlpack diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp new file mode 100644 index 00000000000..61546c134c2 --- /dev/null +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -0,0 +1,116 @@ +/** + * @file rmsprop_update.hpp + * @author Ryan Curtin + * @author Marcus Edel + * @author Vivek Pal + * + * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of + * recent gradients to normalize the gradients. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_UPDATE_HPP +#define MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_UPDATE_HPP + +#include + +namespace mlpack { +namespace optimization { + +/** + * RMSprop is an optimizer that utilizes the magnitude of recent gradients to + * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$ + * and a decay term \f$ \alpha \f$ we perform the following updates: + * + * \f{eqnarray*}{ + * r_t &=& (1 - \gamma) f'(\Delta_t)^2 + \gamma r_{t - 1} \\ + * v_{t + 1} &=& \frac{\alpha}{\sqrt{r_t}}f'(\Delta_t) \\ + * \Delta_{t + 1} &=& \Delta_t - v_{t + 1} + * \f} + * + * For more information, see the following. + * + * @code + * @misc{tieleman2012, + * title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine + * Learning}, + * year={2012} + * } + * @endcode + */ +class RMSpropUpdate +{ + public: + /** + * Construct the RMSprop update policy with the given epsilon parameter. + * + * @param epsilon The epsilon value used to initialise the squared gradient + * parameter. + * @param beta1 The smoothing parameter. + */ + RMSpropUpdate(const double epsilon = 1e-8, + const double alpha = 0.99) : + epsilon(epsilon), + alpha(alpha) + { + // Nothing to do. + } + + /** + * The Initialize method is called by SGD Optimizer method before the start of + * the iteration update process. + * + * @param rows number of rows in the gradient matrix. + * @param cols number of columns in the gradient matrix. + */ + void Initialize(const size_t rows, + const size_t cols) + { + // Leaky sum of squares of parameter gradient. + meanSquaredGradient = arma::zeros(rows, cols); + } + + /** + * Update step for RMSprop. + * + * @param iterate Parameters that minimize the function. + * @param stepSize Step size to be used for the given iteration. + * @param gradient The gradient matrix. + */ + void Update(arma::mat& iterate, + const double stepSize, + const arma::mat& gradient) + { + meanSquaredGradient *= alpha; + meanSquaredGradient += (1 - alpha) * (gradient % gradient); + iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + epsilon); + } + + //! Get the value used to initialise the squared gradient parameter. + double Epsilon() const { return epsilon; } + //! Modify the value used to initialise the squared gradient parameter. + double& Epsilon() { return epsilon; } + + //! Get the smoothing parameter. + double Alpha() const { return alpha; } + //! Modify the smoothing parameter. + double& Alpha() { return alpha; } + + private: + // The epsilon value used to initialise the squared gradient parameter. + double epsilon; + + // The smoothing parameter. + double alpha; + + // Leaky sum of squares of parameter gradient. + arma::mat meanSquaredGradient; +}; + +} // namespace optimization +} // namespace mlpack + +#endif \ No newline at end of file From 2c65ade104c5a3533ed6f42de52481c267492d6a Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 22:34:42 +0530 Subject: [PATCH 54/78] Fix a mistake --- src/mlpack/core/optimizers/rmsprop/rmsprop.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp index 31301a99b27..f2d9a084399 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp @@ -124,9 +124,9 @@ class RMSprop double& Alpha() { return optimizer.UpdatePolicy().Alpha(); } //! Get the value used to initialise the mean squared gradient parameter. - double Epsilon() const { return optimizer.UpdatePolicy().Alpha(); } + double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); } //! Modify the value used to initialise the mean squared gradient parameter. - double& Epsilon() { return optimizer.UpdatePolicy().Alpha(); } + double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); } //! Get the maximum number of iterations (0 indicates no limit). size_t MaxIterations() const { return optimizer.MaxIterations(); } From 612c5d98a85c42196bbb00c54f965d3b246eb803 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Sun, 2 Apr 2017 22:43:09 +0530 Subject: [PATCH 55/78] Fix a typo: RmsProp -> RMSprop --- src/mlpack/core/optimizers/rmsprop/rmsprop.hpp | 2 +- src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp index f2d9a084399..54136aca424 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp @@ -4,7 +4,7 @@ * @author Marcus Edel * @author Vivek Pal * - * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of + * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of * recent gradients to normalize the gradients. * * mlpack is free software; you may redistribute it and/or modify it under the diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp index 61546c134c2..d18dfc85ea5 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -4,7 +4,7 @@ * @author Marcus Edel * @author Vivek Pal * - * RMSprop optimizer. RmsProp is an optimizer that utilizes the magnitude of + * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of * recent gradients to normalize the gradients. * * mlpack is free software; you may redistribute it and/or modify it under the From c1dd7308c1dddf72c8216bd3e1898ee53b90146e Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Tue, 4 Apr 2017 18:13:09 +0530 Subject: [PATCH 56/78] Fix alignment issues --- src/mlpack/core/optimizers/rmsprop/rmsprop.hpp | 12 ++++++------ .../core/optimizers/rmsprop/rmsprop_update.hpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp index 54136aca424..782c7e38c6d 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp @@ -88,12 +88,12 @@ class RMSprop * function is visited in linear order. */ RMSprop(DecomposableFunctionType& function, - const double stepSize = 0.01, - const double alpha = 0.99, - const double epsilon = 1e-8, - const size_t maxIterations = 100000, - const double tolerance = 1e-5, - const bool shuffle = true); + const double stepSize = 0.01, + const double alpha = 0.99, + const double epsilon = 1e-8, + const size_t maxIterations = 100000, + const double tolerance = 1e-5, + const bool shuffle = true); /** * Optimize the given function using RMSprop. The given starting point will be diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp index d18dfc85ea5..e5195f034d4 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -52,7 +52,7 @@ class RMSpropUpdate * @param beta1 The smoothing parameter. */ RMSpropUpdate(const double epsilon = 1e-8, - const double alpha = 0.99) : + const double alpha = 0.99) : epsilon(epsilon), alpha(alpha) { From b8a51f33482d964d08389fe0820e92f8ffb8d772 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Tue, 4 Apr 2017 18:14:15 +0530 Subject: [PATCH 57/78] Correct the smoothing parameter name --- src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp index e5195f034d4..d1451246784 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -49,7 +49,7 @@ class RMSpropUpdate * * @param epsilon The epsilon value used to initialise the squared gradient * parameter. - * @param beta1 The smoothing parameter. + * @param alpha The smoothing parameter. */ RMSpropUpdate(const double epsilon = 1e-8, const double alpha = 0.99) : From 1b9cfeafdea1ea0ec0c80bda10ec0eb93722004b Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Tue, 4 Apr 2017 18:26:33 +0530 Subject: [PATCH 58/78] Update the comment about the RMSpropUpdate constructor --- src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp index d1451246784..750489b282a 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -45,7 +45,7 @@ class RMSpropUpdate { public: /** - * Construct the RMSprop update policy with the given epsilon parameter. + * Construct the RMSprop update policy with the given parameters. * * @param epsilon The epsilon value used to initialise the squared gradient * parameter. From 73b5c68f0a53a795e36fd234fddc6e8b05ed91e4 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 6 Apr 2017 21:47:47 +0530 Subject: [PATCH 59/78] Rename RMSprop as RMSProp --- src/mlpack/core/optimizers/rmsprop/rmsprop.hpp | 18 +++++++++--------- .../core/optimizers/rmsprop/rmsprop_impl.hpp | 6 +++--- .../core/optimizers/rmsprop/rmsprop_update.hpp | 12 ++++++------ src/mlpack/methods/ann/ffn.hpp | 6 +++--- src/mlpack/tests/feedforward_network_test.cpp | 6 +++--- src/mlpack/tests/ksinit_test.cpp | 2 +- src/mlpack/tests/rmsprop_test.cpp | 12 ++++++------ 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp index 782c7e38c6d..4d67dd906b2 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp @@ -4,7 +4,7 @@ * @author Marcus Edel * @author Vivek Pal * - * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of + * RMSProp optimizer. RMSProp is an optimizer that utilizes the magnitude of * recent gradients to normalize the gradients. * * mlpack is free software; you may redistribute it and/or modify it under the @@ -24,7 +24,7 @@ namespace mlpack { namespace optimization { /** - * RMSprop is an optimizer that utilizes the magnitude of recent gradients to + * RMSProp is an optimizer that utilizes the magnitude of recent gradients to * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$ * and a decay term \f$ \alpha \f$ we perform the following updates: * @@ -44,7 +44,7 @@ namespace optimization { * } * @endcode * - * For RMSprop to work, a DecomposableFunctionType template parameter is + * For RMSProp to work, a DecomposableFunctionType template parameter is * required. This class must implement the following function: * * size_t NumFunctions(); @@ -65,11 +65,11 @@ namespace optimization { * minimized. */ template -class RMSprop +class RMSProp { public: /** - * Construct the RMSprop optimizer with the given function and parameters. The + * Construct the RMSProp optimizer with the given function and parameters. The * defaults here are not necessarily good for the given problem, so it is * suggested that the values used be tailored to the task at hand. The * maximum number of iterations refers to the maximum number of points that @@ -87,7 +87,7 @@ class RMSprop * @param shuffle If true, the function order is shuffled; otherwise, each * function is visited in linear order. */ - RMSprop(DecomposableFunctionType& function, + RMSProp(DecomposableFunctionType& function, const double stepSize = 0.01, const double alpha = 0.99, const double epsilon = 1e-8, @@ -96,7 +96,7 @@ class RMSprop const bool shuffle = true); /** - * Optimize the given function using RMSprop. The given starting point will be + * Optimize the given function using RMSProp. The given starting point will be * modified to store the finishing point of the algorithm, and the final * objective value is returned. * @@ -144,8 +144,8 @@ class RMSprop bool& Shuffle() { return optimizer.Shuffle(); } private: - //! The Stochastic Gradient Descent object with RMSpropUpdate policy. - SGD optimizer; + //! The Stochastic Gradient Descent object with RMSPropUpdate policy. + SGD optimizer; }; } // namespace optimization diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp index 71a457e437f..3dcd8606954 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_impl.hpp @@ -4,7 +4,7 @@ * @author Marcus Edel * @author Vivek Pal * - * Implementation of the RMSprop constructor. + * Implementation of the RMSProp constructor. * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the @@ -21,7 +21,7 @@ namespace mlpack { namespace optimization { template -RMSprop::RMSprop(DecomposableFunctionType& function, +RMSProp::RMSProp(DecomposableFunctionType& function, const double stepSize, const double alpha, const double epsilon, @@ -33,7 +33,7 @@ RMSprop::RMSprop(DecomposableFunctionType& function, maxIterations, tolerance, shuffle, - RMSpropUpdate(epsilon, + RMSPropUpdate(epsilon, alpha)) { /* Nothing to do. */ } diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp index 750489b282a..5ca0a900348 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -4,7 +4,7 @@ * @author Marcus Edel * @author Vivek Pal * - * RMSprop optimizer. RMSprop is an optimizer that utilizes the magnitude of + * RMSProp optimizer. RMSProp is an optimizer that utilizes the magnitude of * recent gradients to normalize the gradients. * * mlpack is free software; you may redistribute it and/or modify it under the @@ -21,7 +21,7 @@ namespace mlpack { namespace optimization { /** - * RMSprop is an optimizer that utilizes the magnitude of recent gradients to + * RMS{rop is an optimizer that utilizes the magnitude of recent gradients to * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$ * and a decay term \f$ \alpha \f$ we perform the following updates: * @@ -41,17 +41,17 @@ namespace optimization { * } * @endcode */ -class RMSpropUpdate +class RMSPropUpdate { public: /** - * Construct the RMSprop update policy with the given parameters. + * Construct the RMSProp update policy with the given parameters. * * @param epsilon The epsilon value used to initialise the squared gradient * parameter. * @param alpha The smoothing parameter. */ - RMSpropUpdate(const double epsilon = 1e-8, + RMSPropUpdate(const double epsilon = 1e-8, const double alpha = 0.99) : epsilon(epsilon), alpha(alpha) @@ -74,7 +74,7 @@ class RMSpropUpdate } /** - * Update step for RMSprop. + * Update step for RMSProp. * * @param iterate Parameters that minimize the function. * @param stepSize Step size to be used for the given iteration. diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp index 051a5b03a69..466900bbca0 100644 --- a/src/mlpack/methods/ann/ffn.hpp +++ b/src/mlpack/methods/ann/ffn.hpp @@ -93,7 +93,7 @@ class FFN */ template< template class OptimizerType = - mlpack::optimization::RMSprop, + mlpack::optimization::RMSProp, typename... OptimizerTypeArgs > void Train(const arma::mat& predictors, @@ -102,7 +102,7 @@ class FFN /** * Train the feedforward network on the given input data. By default, the - * RMSprop optimization algorithm is used, but others can be specified + * RMSProp optimization algorithm is used, but others can be specified * (such as mlpack::optimization::SGD). * * This will use the existing model parameters as a starting point for the @@ -114,7 +114,7 @@ class FFN * @param responses Outputs results from input training variables. */ template< - template class OptimizerType = mlpack::optimization::RMSprop + template class OptimizerType = mlpack::optimization::RMSProp > void Train(const arma::mat& predictors, const arma::mat& responses); diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp index 883fe9e6b00..e76e83bfe21 100644 --- a/src/mlpack/tests/feedforward_network_test.cpp +++ b/src/mlpack/tests/feedforward_network_test.cpp @@ -66,7 +66,7 @@ void BuildVanillaNetwork(MatType& trainData, model.Add >(hiddenLayerSize, outputSize); model.Add >(); - RMSprop opt(model, 0.01, 0.88, 1e-8, + RMSProp opt(model, 0.01, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1); model.Train(std::move(trainData), std::move(trainLabels), opt); @@ -194,7 +194,7 @@ void BuildDropoutNetwork(MatType& trainData, model.Add >(hiddenLayerSize, outputSize); model.Add >(); - RMSprop opt(model, 0.01, 0.88, 1e-8, + RMSProp opt(model, 0.01, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1); model.Train(std::move(trainData), std::move(trainLabels), opt); @@ -324,7 +324,7 @@ void BuildDropConnectNetwork(MatType& trainData, model.Add >(hiddenLayerSize, outputSize); model.Add >(); - RMSprop opt(model, 0.01, 0.88, 1e-8, + RMSProp opt(model, 0.01, 0.88, 1e-8, maxEpochs * trainData.n_cols, -1); model.Train(std::move(trainData), std::move(trainLabels), opt); diff --git a/src/mlpack/tests/ksinit_test.cpp b/src/mlpack/tests/ksinit_test.cpp index 1b42d8bfa0a..16ec7457e42 100644 --- a/src/mlpack/tests/ksinit_test.cpp +++ b/src/mlpack/tests/ksinit_test.cpp @@ -85,7 +85,7 @@ void BuildVanillaNetwork(MatType& trainData, model.Add >(); model.Add >(hiddenLayerSize, outputSize); - RMSprop opt(model, 0.01, 0.88, 1e-8, + RMSProp opt(model, 0.01, 0.88, 1e-8, maxEpochs * trainData.n_cols, 1e-18); model.Train(std::move(trainData), std::move(trainLabels), opt); diff --git a/src/mlpack/tests/rmsprop_test.cpp b/src/mlpack/tests/rmsprop_test.cpp index 831df74302d..19275588dff 100644 --- a/src/mlpack/tests/rmsprop_test.cpp +++ b/src/mlpack/tests/rmsprop_test.cpp @@ -27,15 +27,15 @@ using namespace mlpack::optimization::test; using namespace mlpack::distribution; using namespace mlpack::regression; -BOOST_AUTO_TEST_SUITE(RMSpropTest); +BOOST_AUTO_TEST_SUITE(RMSPropTest); /** - * Tests the RMSprop optimizer using a simple test function. + * Tests the RMSProp optimizer using a simple test function. */ -BOOST_AUTO_TEST_CASE(SimpleRMSpropTestFunction) +BOOST_AUTO_TEST_CASE(SimpleRMSPropTestFunction) { SGDTestFunction f; - RMSprop optimizer(f, 1e-3, 0.99, 1e-8, 5000000, 1e-9, true); + RMSProp optimizer(f, 1e-3, 0.99, 1e-8, 5000000, 1e-9, true); arma::mat coordinates = f.GetInitialPoint(); optimizer.Optimize(coordinates); @@ -46,7 +46,7 @@ BOOST_AUTO_TEST_CASE(SimpleRMSpropTestFunction) } /** - * Run RMSprop on logistic regression and make sure the results are acceptable. + * Run RMSProp on logistic regression and make sure the results are acceptable. */ BOOST_AUTO_TEST_CASE(LogisticRegressionTest) { @@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(LogisticRegressionTest) LogisticRegression<> lr(shuffledData.n_rows, 0.5); LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5); - RMSprop > rmsprop(lrf); + RMSProp > rmsprop(lrf); lr.Train(rmsprop); // Ensure that the error is close to zero. From 408f0e8352b0cdeda6c049fa8c79ee252a01db8e Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 6 Apr 2017 21:50:20 +0530 Subject: [PATCH 60/78] fixup! Rename RMSprop as RMSProp --- src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp index 5ca0a900348..b9a821a8c48 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -21,7 +21,7 @@ namespace mlpack { namespace optimization { /** - * RMS{rop is an optimizer that utilizes the magnitude of recent gradients to + * RMSProp is an optimizer that utilizes the magnitude of recent gradients to * normalize the gradients. In its basic form, given a step rate \f$ \gamma \f$ * and a decay term \f$ \alpha \f$ we perform the following updates: * From d6e82692738e444c6a95fe14e925e5fec203c795 Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Thu, 6 Apr 2017 22:01:34 +0530 Subject: [PATCH 61/78] Rename RMSprop as RMSProp in convolutional_network_test.cpp --- src/mlpack/tests/convolutional_network_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp index 26b4c28bd18..ea54a4ab95b 100644 --- a/src/mlpack/tests/convolutional_network_test.cpp +++ b/src/mlpack/tests/convolutional_network_test.cpp @@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE(VanillaNetworkTest) model.Add >(10, 2); model.Add >(); - RMSprop opt(model, 0.001, 0.88, 1e-8, 5000, -1); + RMSProp opt(model, 0.001, 0.88, 1e-8, 5000, -1); model.Train(std::move(X), std::move(Y), opt); From 47ab60c6e3b83cce701583d95b66a64ccd1bf390 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 12 Apr 2017 17:02:32 +0200 Subject: [PATCH 62/78] Use the correct size for the random block initialization. --- .../methods/block_krylov_svd/randomized_block_krylov_svd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp index 702d706fd28..bef09fd3992 100644 --- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp @@ -57,7 +57,7 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, } // Random block initialization. - arma::mat G = arma::randn(data.n_rows, blockSize); + arma::mat G = arma::randn(data.n_cols, blockSize); // Construct and orthonormalize Krylov subspace. arma::mat K(data.n_rows, blockSize * (maxIterations + 1)); From 4fd1d707156d20d14d0464cb7a62e5d83a731bd5 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Wed, 12 Apr 2017 17:05:56 +0200 Subject: [PATCH 63/78] Fix dimension mismatch, that occurs if rows != cols and icrease the number of iterations (power method) to stabilize the test case. --- src/mlpack/tests/block_krylov_svd_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp index 5d7e6e2e283..44b3302b9c8 100644 --- a/src/mlpack/tests/block_krylov_svd_test.cpp +++ b/src/mlpack/tests/block_krylov_svd_test.cpp @@ -39,7 +39,8 @@ void CreateNoisyLowRankMatrix(arma::mat& data, arma::exp(-1.0 * arma::pow((ids / rank), 2))); arma::vec tail = strength * arma::exp(-0.1 * ids / rank); - arma::mat s = arma::eye(n, n) * (lowRank + tail); + arma::mat s = arma::zeros(n, n); + s.diag() = lowRank + tail; data = (U * s) * V.t(); } @@ -102,7 +103,7 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest) arma::svd_econ(U1, s1, V1, data); - svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 1, rank, 5); + svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 5, rank, 5); double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank))); BOOST_REQUIRE_SMALL(error, 0.1); From d5358b7f399206431b99e1b24d0ecf1afb87e99f Mon Sep 17 00:00:00 2001 From: CodeAi Date: Wed, 12 Apr 2017 11:28:41 -0400 Subject: [PATCH 64/78] CodeAi fixed 4 null pointer dereferences by tightening the if condition to check the potentially null pointer variables before use in the constructor. --- .../methods/hoeffding_trees/hoeffding_tree_model.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp index 0599a4ce08c..9b35b531529 100644 --- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp +++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_model.cpp @@ -68,13 +68,13 @@ HoeffdingTreeModel& HoeffdingTreeModel::operator=( // Create the right tree. type = other.type; - if (type == GINI_HOEFFDING) + if (other.giniHoeffdingTree && (type == GINI_HOEFFDING)) giniHoeffdingTree = new GiniHoeffdingTreeType(*other.giniHoeffdingTree); - else if (type == GINI_BINARY) + else if (other.giniBinaryTree && (type == GINI_BINARY)) giniBinaryTree = new GiniBinaryTreeType(*other.giniBinaryTree); - else if (type == INFO_HOEFFDING) + else if (other.infoHoeffdingTree && (type == INFO_HOEFFDING)) infoHoeffdingTree = new InfoHoeffdingTreeType(*other.infoHoeffdingTree); - else if (type == INFO_BINARY) + else if (other.infoBinaryTree && (type == INFO_BINARY)) infoBinaryTree = new InfoBinaryTreeType(*other.infoBinaryTree); return *this; From 3f7d13803b7119270157aa12f2e6aaabd5d85959 Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Wed, 12 Apr 2017 17:52:01 -0400 Subject: [PATCH 65/78] Use std::forward and universal references to simplify. --- src/mlpack/methods/emst/dtb_impl.hpp | 25 ++++---- .../methods/kmeans/dual_tree_kmeans_impl.hpp | 22 ++++--- .../neighbor_search/neighbor_search_impl.hpp | 57 +++++-------------- .../range_search/range_search_impl.hpp | 51 +++++------------ src/mlpack/methods/rann/ra_search_impl.hpp | 46 ++++----------- src/mlpack/prereqs.hpp | 1 + 6 files changed, 60 insertions(+), 142 deletions(-) diff --git a/src/mlpack/methods/emst/dtb_impl.hpp b/src/mlpack/methods/emst/dtb_impl.hpp index 1d87ea73f19..60320757413 100644 --- a/src/mlpack/methods/emst/dtb_impl.hpp +++ b/src/mlpack/methods/emst/dtb_impl.hpp @@ -18,27 +18,25 @@ namespace mlpack { namespace emst { //! Call the tree constructor that does mapping. -template +template TreeType* BuildTree( - MatType& dataset, + MatType&& dataset, std::vector& oldFromNew, - const typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) + const typename std::enable_if< + tree::TreeTraits::RearrangesDataset>::type* = 0) { - return new TreeType(dataset, oldFromNew); + return new TreeType(std::forward(dataset), oldFromNew); } //! Call the tree constructor that does not do mapping. -template +template TreeType* BuildTree( - const MatType& dataset, + MatType&& dataset, const std::vector& /* oldFromNew */, - const typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset == false, TreeType - >* = 0) + const typename std::enable_if< + !tree::TreeTraits::RearrangesDataset>::type* = 0) { - return new TreeType(dataset); + return new TreeType(std::forward(dataset)); } /** @@ -55,8 +53,7 @@ DualTreeBoruvka::DualTreeBoruvka( const MatType& dataset, const bool naive, const MetricType metric) : - tree(naive ? NULL : BuildTree(const_cast(dataset), - oldFromNew)), + tree(naive ? NULL : BuildTree(dataset, oldFromNew)), data(naive ? dataset : tree->Dataset()), ownTree(!naive), naive(naive), diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp index 27c54d53a27..66b0e6519d1 100644 --- a/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp +++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_impl.hpp @@ -24,29 +24,27 @@ namespace mlpack { namespace kmeans { //! Call the tree constructor that does mapping. -template +template TreeType* BuildTree( - const typename TreeType::Mat& dataset, + MatType&& dataset, std::vector& oldFromNew, - const typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) + const typename std::enable_if< + tree::TreeTraits::RearrangesDataset>::type* = 0) { // This is a hack. I know this will be BinarySpaceTree, so force a leaf size // of two. - return new TreeType(dataset, oldFromNew, 1); + return new TreeType(std::forward(dataset), oldFromNew, 1); } //! Call the tree constructor that does not do mapping. -template +template TreeType* BuildTree( - const typename TreeType::Mat& dataset, + MatType&& dataset, const std::vector& /* oldFromNew */, - const typename std::enable_if_t< - !tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) + const typename std::enable_if< + !tree::TreeTraits::RearrangesDataset>::type* = 0) { - return new TreeType(dataset); + return new TreeType(std::forward(dataset)); } template -TreeType* BuildTree( - const MatType& dataset, - std::vector& oldFromNew, - typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) -{ - return new TreeType(dataset, oldFromNew); -} - -//! Call the tree constructor that does not do mapping. -template -TreeType* BuildTree( - const MatType& dataset, - const std::vector& /* oldFromNew */, - const typename std::enable_if_t< - !tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) -{ - return new TreeType(dataset); -} - -//! Call the tree construct that does mapping. -template +template TreeType* BuildTree( MatType&& dataset, std::vector& oldFromNew, @@ -54,19 +30,19 @@ TreeType* BuildTree( tree::TreeTraits::RearrangesDataset, TreeType >* = 0) { - return new TreeType(std::move(dataset), oldFromNew); + return new TreeType(std::forward(dataset), oldFromNew); } //! Call the tree constructor that does not do mapping. -template +template TreeType* BuildTree( MatType&& dataset, - std::vector& /* oldFromNew */, - typename std::enable_if_t< + const std::vector& /* oldFromNew */, + const typename std::enable_if_t< !tree::TreeTraits::RearrangesDataset, TreeType >* = 0) { - return new TreeType(std::move(dataset)); + return new TreeType(std::forward(dataset)); } // Construct the object. @@ -84,7 +60,7 @@ SingleTreeTraversalType>::NeighborSearch(const MatType& referenceSetIn, const double epsilon, const MetricType metric) : referenceTree(mode == NAIVE_MODE ? NULL : - BuildTree(referenceSetIn, oldFromNewReferences)), + BuildTree(referenceSetIn, oldFromNewReferences)), referenceSet(mode == NAIVE_MODE ? &referenceSetIn : &referenceTree->Dataset()), treeOwner(mode != NAIVE_MODE), @@ -115,9 +91,8 @@ SingleTreeTraversalType>::NeighborSearch(MatType&& referenceSetIn, const double epsilon, const MetricType metric) : referenceTree(mode == NAIVE_MODE ? NULL : - BuildTree(std::move(referenceSetIn), - oldFromNewReferences)), - referenceSet(mode == NAIVE_MODE ? new MatType(std::move(referenceSetIn)) : + BuildTree(std::move(referenceSetIn), oldFromNewReferences)), + referenceSet(mode == NAIVE_MODE ? new MatType(std::move(referenceSetIn)) : &referenceTree->Dataset()), treeOwner(mode != NAIVE_MODE), setOwner(mode == NAIVE_MODE), @@ -220,8 +195,7 @@ SingleTreeTraversalType>::NeighborSearch(const NeighborSearchMode mode, // Build the tree on the empty dataset, if necessary. if (mode != NAIVE_MODE) { - referenceTree = BuildTree(*referenceSet, - oldFromNewReferences); + referenceTree = BuildTree(*referenceSet, oldFromNewReferences); treeOwner = true; } } @@ -278,7 +252,7 @@ SingleTreeTraversalType>::NeighborSearch(NeighborSearch&& other) : { // Clear the other model. other.referenceSet = new MatType(); - other.referenceTree = BuildTree(*other.referenceSet, + other.referenceTree = BuildTree(*other.referenceSet, other.oldFromNewReferences); other.treeOwner = true; other.setOwner = true; @@ -373,7 +347,7 @@ NeighborSearch(*other.referenceSet, + other.referenceTree = BuildTree(*other.referenceSet, other.oldFromNewReferences); other.treeOwner = true; other.setOwner = true; @@ -424,8 +398,7 @@ DualTreeTraversalType, SingleTreeTraversalType>::Train( // We may need to rebuild the tree. if (searchMode != NAIVE_MODE) { - referenceTree = BuildTree(referenceSet, - oldFromNewReferences); + referenceTree = BuildTree(referenceSet, oldFromNewReferences); treeOwner = true; } else @@ -465,7 +438,7 @@ DualTreeTraversalType, SingleTreeTraversalType>::Train(MatType&& referenceSetIn) // We may need to rebuild the tree. if (searchMode != NAIVE_MODE) { - referenceTree = BuildTree(std::move(referenceSetIn), + referenceTree = BuildTree(std::move(referenceSetIn), oldFromNewReferences); treeOwner = true; } @@ -656,7 +629,7 @@ DualTreeTraversalType, SingleTreeTraversalType>::Search( // Build the query tree. Timer::Stop("computing_neighbors"); Timer::Start("tree_building"); - Tree* queryTree = BuildTree(querySet, oldFromNewQueries); + Tree* queryTree = BuildTree(querySet, oldFromNewQueries); Timer::Stop("tree_building"); Timer::Start("computing_neighbors"); diff --git a/src/mlpack/methods/range_search/range_search_impl.hpp b/src/mlpack/methods/range_search/range_search_impl.hpp index 5c6571a7997..f1a813c1668 100644 --- a/src/mlpack/methods/range_search/range_search_impl.hpp +++ b/src/mlpack/methods/range_search/range_search_impl.hpp @@ -21,49 +21,25 @@ namespace mlpack { namespace range { -template +template TreeType* BuildTree( - typename TreeType::Mat& dataset, + MatType&& dataset, std::vector& oldFromNew, - typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) + const typename std::enable_if< + tree::TreeTraits::RearrangesDataset>::type* = 0) { - return new TreeType(dataset, oldFromNew); + return new TreeType(std::forward(dataset), oldFromNew); } //! Call the tree constructor that does not do mapping. -template +template TreeType* BuildTree( - const typename TreeType::Mat& dataset, + MatType&& dataset, const std::vector& /* oldFromNew */, - const typename std::enable_if_t< - !tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) + const typename std::enable_if< + !tree::TreeTraits::RearrangesDataset>::type* = 0) { - return new TreeType(dataset); -} - -template -TreeType* BuildTree( - typename TreeType::Mat&& dataset, - std::vector& oldFromNew, - const typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) -{ - return new TreeType(std::move(dataset), oldFromNew); -} - -template -TreeType* BuildTree( - typename TreeType::Mat&& dataset, - const std::vector& /* oldFromNew */, - const typename std::enable_if_t< - !tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) -{ - return new TreeType(std::move(dataset)); + return new TreeType(std::forward(dataset)); } template::RangeSearch( const bool naive, const bool singleMode, const MetricType metric) : - referenceTree(naive ? NULL : BuildTree( - const_cast(referenceSetIn), oldFromNewReferences)), + referenceTree(naive ? NULL : BuildTree(referenceSetIn, + oldFromNewReferences)), referenceSet(naive ? &referenceSetIn : &referenceTree->Dataset()), treeOwner(!naive), // If in naive mode, we are not building any trees. setOwner(false), @@ -497,8 +473,7 @@ void RangeSearch::Search( // Build the query tree. Timer::Stop("range_search/computing_neighbors"); Timer::Start("range_search/tree_building"); - Tree* queryTree = BuildTree(const_cast(querySet), - oldFromNewQueries); + Tree* queryTree = BuildTree(querySet, oldFromNewQueries); Timer::Stop("range_search/tree_building"); Timer::Start("range_search/computing_neighbors"); diff --git a/src/mlpack/methods/rann/ra_search_impl.hpp b/src/mlpack/methods/rann/ra_search_impl.hpp index fca31657ad3..23984132fd9 100644 --- a/src/mlpack/methods/rann/ra_search_impl.hpp +++ b/src/mlpack/methods/rann/ra_search_impl.hpp @@ -23,51 +23,25 @@ namespace neighbor { namespace aux { //! Call the tree constructor that does mapping. -template +template TreeType* BuildTree( - const typename TreeType::Mat& dataset, + MatType&& dataset, std::vector& oldFromNew, - typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) + typename std::enable_if< + tree::TreeTraits::RearrangesDataset>::type* = 0) { - return new TreeType(dataset, oldFromNew); + return new TreeType(std::forward(dataset), oldFromNew); } //! Call the tree constructor that does not do mapping. -template +template TreeType* BuildTree( - const typename TreeType::Mat& dataset, + MatType&& dataset, const std::vector& /* oldFromNew */, - const typename std::enable_if_t< - !tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) + const typename std::enable_if< + !tree::TreeTraits::RearrangesDataset>::type* = 0) { - return new TreeType(dataset); -} - -//! Call the tree constructor that does mapping. -template -TreeType* BuildTree( - typename TreeType::Mat&& dataset, - std::vector& oldFromNew, - typename std::enable_if_t< - tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) -{ - return new TreeType(std::move(dataset), oldFromNew); -} - -//! Call the tree constructor that does not do mapping. -template -TreeType* BuildTree( - typename TreeType::Mat&& dataset, - const std::vector& /* oldFromNew */, - const typename std::enable_if_t< - !tree::TreeTraits::RearrangesDataset, TreeType - >* = 0) -{ - return new TreeType(std::move(dataset)); + return new TreeType(std::forward(dataset)); } } // namespace aux diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp index e46fa859ab6..42a49d637cb 100644 --- a/src/mlpack/prereqs.hpp +++ b/src/mlpack/prereqs.hpp @@ -34,6 +34,7 @@ #include #include #include +#include // But if it's not defined, we'll do it. #ifndef M_PI From f790cb1f181298c609a7455d4150fec5b2083327 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Thu, 13 Apr 2017 01:39:17 +0200 Subject: [PATCH 66/78] Increase data matrix and number of iterations for the power method. --- src/mlpack/tests/block_krylov_svd_test.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/mlpack/tests/block_krylov_svd_test.cpp b/src/mlpack/tests/block_krylov_svd_test.cpp index 44b3302b9c8..fa608596fff 100644 --- a/src/mlpack/tests/block_krylov_svd_test.cpp +++ b/src/mlpack/tests/block_krylov_svd_test.cpp @@ -94,7 +94,7 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDReconstructionError) BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest) { arma::mat data; - CreateNoisyLowRankMatrix(data, 100, 1000, 5, 1.0); + CreateNoisyLowRankMatrix(data, 200, 1000, 5, 0.5); const size_t rank = 5; @@ -103,15 +103,10 @@ BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovSVDNoisyLowRankTest) arma::svd_econ(U1, s1, V1, data); - svd::RandomizedBlockKrylovSVD rSVDA(data, U2, s2, V2, 5, rank, 5); - - double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank))); - BOOST_REQUIRE_SMALL(error, 0.1); - svd::RandomizedBlockKrylovSVD rSVDB(data, U2, s2, V2, 10, rank, 20); - error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank))); - BOOST_REQUIRE_SMALL(error, 1e-3); + double error = arma::max(arma::abs(s1.subvec(0, rank) - s2.subvec(0, rank))); + BOOST_REQUIRE_SMALL(error, 1e-2); } BOOST_AUTO_TEST_SUITE_END(); From 03ba9bf6d1bb047bb83e1359842b42ef7481d66b Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Thu, 13 Apr 2017 11:03:11 -0400 Subject: [PATCH 67/78] Add first non-human contributor. --- COPYRIGHT.txt | 1 + src/mlpack/core.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index d39c4a2b5cb..bf9841da5f7 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -78,6 +78,7 @@ Copyright: Copyright 2017, Sagar B Hathwar Copyright 2017, Nishanth Hegde Copyright 2017, Parminder Singh + Copyright 2017, CodeAi License: BSD-3-clause All rights reserved. diff --git a/src/mlpack/core.hpp b/src/mlpack/core.hpp index 0880064c5eb..94a2ab84e75 100644 --- a/src/mlpack/core.hpp +++ b/src/mlpack/core.hpp @@ -219,6 +219,7 @@ * - Sagar B Hathwar * - Nishanth Hegde * - Parminder Singh + * - CodeAi (deep learning bug detector) */ // First, include all of the prerequisites. From 27ab7fa20044751fef460bb06ab5c44e2ab369de Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Thu, 13 Apr 2017 15:24:53 -0400 Subject: [PATCH 68/78] Update history. --- HISTORY.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index e5883ec98b5..5541f6326df 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,11 @@ ### mlpack ?.?.? ###### ????-??-?? +### mlpack 2.2.1 +###### 2017-04-13 + * Compilation fix for mlpack_nca and mlpack_test on older Armadillo versions + (#984). + ### mlpack 2.2.0 ###### 2017-03-21 * Bugfix for mlpack_knn program (#816). From dec8a4ffcbe235362d62870d30f05255bd1a13c6 Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Thu, 13 Apr 2017 15:36:20 -0400 Subject: [PATCH 69/78] Update version numbers. --- doc/guide/build.hpp | 4 ++-- src/mlpack/core/util/version.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/guide/build.hpp b/doc/guide/build.hpp index b9f9913d03e..6b999913ecd 100644 --- a/doc/guide/build.hpp +++ b/doc/guide/build.hpp @@ -23,14 +23,14 @@ href="https://keon.io/mlpack/mlpack-on-windows/">Keon's excellent tutorial. @section Download latest mlpack build Download latest mlpack build from here: -mlpack-2.2.0 +mlpack-2.2.1 @section builddir Creating Build Directory Once the mlpack source is unpacked, you should create a build directory. @code -$ cd mlpack-2.2.0 +$ cd mlpack-2.2.1 $ mkdir build @endcode diff --git a/src/mlpack/core/util/version.hpp b/src/mlpack/core/util/version.hpp index b3dea0c3a52..a921444257e 100644 --- a/src/mlpack/core/util/version.hpp +++ b/src/mlpack/core/util/version.hpp @@ -17,13 +17,13 @@ // The version of mlpack. If this is a git repository, this will be a version // with higher number than the most recent release. #define MLPACK_VERSION_MAJOR 2 -#define MLPACK_VERSION_MINOR 0 +#define MLPACK_VERSION_MINOR 2 #define MLPACK_VERSION_PATCH "x" // Reverse compatibility; these macros will be removed in future versions of // mlpack (3.0.0 and newer)! #define __MLPACK_VERSION_MAJOR 2 -#define __MLPACK_VERSION_MINOR 0 +#define __MLPACK_VERSION_MINOR 2 #define __MLPACK_VERSION_PATCH "x" // The name of the version (for use by --version). From e7bfbb19dfbc8cecb5ae094fc3cc45ca0ef87e6a Mon Sep 17 00:00:00 2001 From: Vivek Pal Date: Fri, 14 Apr 2017 23:05:35 +0530 Subject: [PATCH 70/78] Add paper url to the citation --- src/mlpack/core/optimizers/adam/adam.hpp | 3 ++- src/mlpack/core/optimizers/adam/adam_update.hpp | 3 ++- src/mlpack/core/optimizers/adam/adamax_update.hpp | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp index 37411d9bb0b..d4808c6cd93 100644 --- a/src/mlpack/core/optimizers/adam/adam.hpp +++ b/src/mlpack/core/optimizers/adam/adam.hpp @@ -40,7 +40,8 @@ namespace optimization { * author = {Diederik P. Kingma and Jimmy Ba}, * title = {Adam: {A} Method for Stochastic Optimization}, * journal = {CoRR}, - * year = {2014} + * year = {2014}, + * url = {http://arxiv.org/abs/1412.6980} * } * @endcode * diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index 95920c14ee7..540ae1ce01c 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -34,7 +34,8 @@ namespace optimization { * author = {Diederik P. Kingma and Jimmy Ba}, * title = {Adam: {A} Method for Stochastic Optimization}, * journal = {CoRR}, - * year = {2014} + * year = {2014}, + * url = {http://arxiv.org/abs/1412.6980} * } * @endcode */ diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp index 97e26fe683d..6337efaae3a 100644 --- a/src/mlpack/core/optimizers/adam/adamax_update.hpp +++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp @@ -36,7 +36,8 @@ namespace optimization { * author = {Diederik P. Kingma and Jimmy Ba}, * title = {Adam: {A} Method for Stochastic Optimization}, * journal = {CoRR}, - * year = {2014} + * year = {2014}, + * url = {http://arxiv.org/abs/1412.6980} * } * @endcode */ From 0d65254c6c8833edc62de9591ea28570ad7b42ee Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Fri, 14 Apr 2017 14:28:08 +0200 Subject: [PATCH 71/78] Minor style fix. --- .../pca/decomposition_policies/exact_svd_method.hpp | 2 +- .../pca/decomposition_policies/quic_svd_method.hpp | 13 ++++++------- .../randomized_svd_method.hpp | 13 ++++++------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp index fe0fb0c9ffe..dd5fa1e987e 100644 --- a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp +++ b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp @@ -26,7 +26,7 @@ namespace pca { */ class ExactSVDPolicy { - public: + public: /** * Apply Principal Component Analysis to the provided data set using the * exact SVD method. diff --git a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp index df18f0ba882..f3ecc2103d8 100644 --- a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp +++ b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp @@ -25,8 +25,7 @@ namespace pca { */ class QUICSVDPolicy { - public: - + public: /** * Use QUIC-SVD method to perform the principal components analysis (PCA). * @@ -83,12 +82,12 @@ class QUICSVDPolicy //! Modify the cumulative probability for Monte Carlo error lower bound. double& Delta() { return delta; } - private: - //! Error tolerance fraction for calculated subspace. - double epsilon; + private: + //! Error tolerance fraction for calculated subspace. + double epsilon; - //! Cumulative probability for Monte Carlo error lower bound. - double delta; + //! Cumulative probability for Monte Carlo error lower bound. + double delta; }; } // namespace pca diff --git a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp index f7f9089b4c6..148dfb9e02a 100644 --- a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp +++ b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp @@ -16,7 +16,6 @@ #include #include -#include namespace mlpack { namespace pca { @@ -26,7 +25,7 @@ namespace pca { */ class RandomizedSVDPolicy { - public: + public: /** * Use randomized SVD method to perform the principal components analysis * (PCA). @@ -88,12 +87,12 @@ class RandomizedSVDPolicy //! Modify the number of iterations for the power method. size_t& MaxIterations() { return maxIterations; } - private: - //! Locally stored size of the normalized power iterations. - size_t iteratedPower; + private: + //! Locally stored size of the normalized power iterations. + size_t iteratedPower; - //! Locally stored number of iterations for the power method. - size_t maxIterations; + //! Locally stored number of iterations for the power method. + size_t maxIterations; }; } // namespace pca From 12e433d4bdd26d19f964bb6502be2cc73c22d4c7 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Fri, 14 Apr 2017 15:39:15 +0200 Subject: [PATCH 72/78] Add randomized block krylov SVD policy to perform the principal components analysis (PCA). --- .../pca/decomposition_policies/CMakeLists.txt | 1 + .../randomized_block_krylov_method.hpp | 101 ++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp diff --git a/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt index 968c7cc4bb9..85bbb7c305b 100644 --- a/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt +++ b/src/mlpack/methods/pca/decomposition_policies/CMakeLists.txt @@ -2,6 +2,7 @@ # Anything not in this list will not be compiled into mlpack. set(SOURCES exact_svd_method.hpp + randomized_block_krylov_method.hpp randomized_svd_method.hpp quic_svd_method.hpp ) diff --git a/src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp b/src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp new file mode 100644 index 00000000000..957cd3bbd61 --- /dev/null +++ b/src/mlpack/methods/pca/decomposition_policies/randomized_block_krylov_method.hpp @@ -0,0 +1,101 @@ +/** + * @file randomized_block_krylov_method.hpp + * @author Marcus Edel + * + * Implementation of the randomized block krylov SVD method for use in the + * Principal Components Analysis method. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_BLOCK_KRYLOV_HPP +#define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_BLOCK_KRYLOV_HPP + +#include +#include + +namespace mlpack { +namespace pca { + +/** + * Implementation of the randomized block krylov SVD policy. + */ +class RandomizedBlockKrylovSVDPolicy +{ + public: + /** + * Use randomized block krylov SVD method to perform the principal components + * analysis (PCA). + * + * @param maxIterations Number of iterations for the power method + * (Default: 2). + * @param blockSize The block size, must be >= rank (Default: rank + 10). + */ + RandomizedBlockKrylovSVDPolicy(const size_t maxIterations = 2, + const size_t blockSize = 0) : + maxIterations(maxIterations), + blockSize(blockSize) + { + /* Nothing to do here */ + } + + /** + * Apply Principal Component Analysis to the provided data set using the + * randomized block krylov SVD method. + * + * @param data Data matrix. + * @param centeredData Centered data matrix. + * @param transformedData Matrix to put results of PCA into. + * @param eigVal Vector to put eigenvalues into. + * @param eigvec Matrix to put eigenvectors (loadings) into. + * @param rank Rank of the decomposition. + */ + void Apply(const arma::mat& data, + const arma::mat& centeredData, + arma::mat& transformedData, + arma::vec& eigVal, + arma::mat& eigvec, + const size_t rank) + { + // This matrix will store the right singular values; we do not need them. + arma::mat v; + + // Do singular value decomposition using the randomized block krylov SVD + // algorithm. + svd::RandomizedBlockKrylovSVD rsvd(maxIterations, blockSize); + rsvd.Apply(centeredData, eigvec, eigVal, v, rank); + + // Now we must square the singular values to get the eigenvalues. + // In addition we must divide by the number of points, because the + // covariance matrix is X * X' / (N - 1). + eigVal %= eigVal / (data.n_cols - 1); + + // Project the samples to the principals. + transformedData = arma::trans(eigvec) * centeredData; + } + + //! Get the number of iterations for the power method. + size_t MaxIterations() const { return maxIterations; } + //! Modify the number of iterations for the power method. + size_t& MaxIterations() { return maxIterations; } + + //! Get the block size. + size_t BlockSize() const { return blockSize; } + //! Modify the block size. + size_t& BlockSize() { return blockSize; } + + private: + //! Locally stored number of iterations for the power method. + size_t maxIterations; + + //! Locally stored block size value. + size_t blockSize; +}; + +} // namespace pca +} // namespace mlpack + +#endif From dec81074ef22d191ab574b37a70252c80ed6c7f5 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Fri, 14 Apr 2017 15:44:02 +0200 Subject: [PATCH 73/78] Tests for the randomized block krylov pca policy. --- src/mlpack/tests/pca_test.cpp | 36 ++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/mlpack/tests/pca_test.cpp b/src/mlpack/tests/pca_test.cpp index 5ec70b15bb9..926fff93118 100644 --- a/src/mlpack/tests/pca_test.cpp +++ b/src/mlpack/tests/pca_test.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "test_tools.hpp" @@ -31,15 +32,17 @@ using namespace mlpack::distribution; * specified decomposition policy. */ template -void ArmaComparisonPCA() +void ArmaComparisonPCA( + const bool scaleData = false, + const DecompositionPolicy& decomposition = DecompositionPolicy()) { arma::mat coeff, coeff1, score, score1; arma::vec eigVal, eigVal1; arma::mat data = arma::randu(3, 1000); - PCAType exactPCA; - exactPCA.Apply(data, score1, eigVal1, coeff1); + PCAType pcaType(scaleData, decomposition); + pcaType.Apply(data, score1, eigVal1, coeff1); princomp(coeff, score, eigVal, trans(data)); @@ -58,7 +61,9 @@ void ArmaComparisonPCA() * (which should be correct!) using the specified decomposition policy. */ template -void PCADimensionalityReduction() +void PCADimensionalityReduction( + const bool scaleData = false, + const DecompositionPolicy& decomposition = DecompositionPolicy()) { // Fake, simple dataset. The results we will compare against are from MATLAB. mat data("1 0 2 3 9;" @@ -66,7 +71,7 @@ void PCADimensionalityReduction() "6 7 3 1 8"); // Now run PCA to reduce the dimensionality. - PCAType p; + PCAType p(scaleData, decomposition); const double varRetained = p.Apply(data, 2); // Reduce to 2 dimensions. // Compare with correct results. @@ -168,6 +173,16 @@ BOOST_AUTO_TEST_CASE(ArmaComparisonExactPCATest) ArmaComparisonPCA(); } +/** + * Compare the output of our randomized block krylov PCA implementation with + * Armadillo's. + */ +BOOST_AUTO_TEST_CASE(ArmaComparisonRandomizedBlockKrylovPCATest) +{ + RandomizedBlockKrylovSVDPolicy decomposition(5); + ArmaComparisonPCA(false, decomposition); +} + /** * Compare the output of our randomized-SVD PCA implementation with Armadillo's. */ @@ -185,6 +200,17 @@ BOOST_AUTO_TEST_CASE(ExactPCADimensionalityReductionTest) PCADimensionalityReduction(); } +/** + * Test that dimensionality reduction with randomized block krylov PCA works the + * same way MATLAB does (which should be correct!). + */ +BOOST_AUTO_TEST_CASE(RandomizedBlockKrylovPCADimensionalityReductionTest) +{ + RandomizedBlockKrylovSVDPolicy decomposition(5); + PCADimensionalityReduction(false, + decomposition); +} + /** * Test that dimensionality reduction with randomized-svd PCA works the same way * MATLAB does (which should be correct!). From 8218d26b3998140844a6c3c08f2a08fb009424ee Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sat, 22 Apr 2017 22:19:48 +0200 Subject: [PATCH 74/78] Do not use block size for the block initialization if block size isn't specified, since block size approximation might be wrong. --- .../block_krylov_svd/randomized_block_krylov_svd.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp index bef09fd3992..57e9a5d201b 100644 --- a/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp +++ b/src/mlpack/methods/block_krylov_svd/randomized_block_krylov_svd.cpp @@ -71,14 +71,14 @@ void RandomizedBlockKrylovSVD::Apply(const arma::mat& data, blockOffset += block.n_elem) { // Temporary working matrix to store the result in the correct place. - blockIteration = arma::mat(K.memptr() + blockOffset, data.n_rows, - blockSize, false); + blockIteration = arma::mat(K.memptr() + blockOffset, block.n_rows, + block.n_cols, false, false); arma::qr_econ(blockIteration, R, data * (data.t() * block)); // Update working matrix for the next iteration. - block = arma::mat(K.memptr() + blockOffset, data.n_rows, blockSize, false, - false); + block = arma::mat(K.memptr() + blockOffset, block.n_rows, block.n_cols, + false, false); } arma::qr_econ(Q, R, K); From d7dc971b06276a35f08c5f6550df1e5c72382f0e Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 23 Apr 2017 22:16:00 +0200 Subject: [PATCH 75/78] Remove unnecessary typecast (double to double). --- src/mlpack/core/optimizers/adam/adam_update.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index 540ae1ce01c..b420b57311a 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -96,8 +96,8 @@ class AdamUpdate v *= beta2; v += (1 - beta2) * (gradient % gradient); - const double biasCorrection1 = 1.0 - std::pow(beta1, (double) iteration); - const double biasCorrection2 = 1.0 - std::pow(beta2, (double) iteration); + const double biasCorrection1 = 1.0 - std::pow(beta1, iteration); + const double biasCorrection2 = 1.0 - std::pow(beta2, iteration); /** * It should be noted that the term, m / (arma::sqrt(v) + eps), in the From 504b02775c2196d59720f8426e9fed6080683039 Mon Sep 17 00:00:00 2001 From: Marcus Edel Date: Sun, 23 Apr 2017 23:33:54 +0200 Subject: [PATCH 76/78] Minor style fixes (80 columns, clamp multiline, remove trailing spaces, comments). --- .../core/optimizers/ada_delta/ada_delta.hpp | 8 ++--- .../optimizers/ada_delta/ada_delta_update.hpp | 8 ++--- .../core/optimizers/ada_grad/ada_grad.hpp | 14 ++++---- .../optimizers/ada_grad/ada_grad_update.hpp | 4 +-- src/mlpack/core/optimizers/adam/adam.hpp | 24 ++++++------- .../core/optimizers/adam/adam_update.hpp | 16 ++++----- .../core/optimizers/adam/adamax_update.hpp | 4 +-- .../gradient_descent/gradient_descent.hpp | 6 ++-- .../core/optimizers/rmsprop/rmsprop.hpp | 6 ++-- .../optimizers/rmsprop/rmsprop_update.hpp | 13 +++---- .../sgd/update_policies/momentum_update.hpp | 36 +++++++++---------- .../sgd/update_policies/vanilla_update.hpp | 6 ++-- .../core/optimizers/smorms3/smorms3.hpp | 34 ++++++++++-------- .../optimizers/smorms3/smorms3_update.hpp | 31 ++++++++-------- 14 files changed, 108 insertions(+), 102 deletions(-) diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp index 8589374460c..9a209c63e7b 100644 --- a/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp +++ b/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp @@ -34,10 +34,10 @@ namespace optimization { * * @code * @article{Zeiler2012, - * author = {Matthew D. Zeiler}, - * title = {{ADADELTA:} An Adaptive Learning Rate Method}, - * journal = {CoRR}, - * year = {2012} + * author = {Matthew D. Zeiler}, + * title = {{ADADELTA:} An Adaptive Learning Rate Method}, + * journal = {CoRR}, + * year = {2012} * } * @endcode * diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp index 391e2a88bd3..fc3d0bcde02 100644 --- a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp +++ b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp @@ -29,10 +29,10 @@ namespace optimization { * * @code * @article{Zeiler2012, - * author = {Matthew D. Zeiler}, - * title = {{ADADELTA:} An Adaptive Learning Rate Method}, - * journal = {CoRR}, - * year = {2012} + * author = {Matthew D. Zeiler}, + * title = {{ADADELTA:} An Adaptive Learning Rate Method}, + * journal = {CoRR}, + * year = {2012} * } * @endcode * diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp index e96b619e858..6c622c8e3e7 100644 --- a/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp +++ b/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp @@ -30,13 +30,13 @@ namespace optimization { * * @code * @article{duchi2011adaptive, - * author = {Duchi, John and Hazan, Elad and Singer, Yoram}, - * title = {Adaptive subgradient methods for online learning and stochastic optimization}, - * journal = {Journal of Machine Learning Research}, - * volume = {12}, - * number = {Jul}, - * pages = {2121--2159}, - * year = {2011} + * author = {Duchi, John and Hazan, Elad and Singer, Yoram}, + * title = {Adaptive subgradient methods for online learning and stochastic optimization}, + * journal = {Journal of Machine Learning Research}, + * volume = {12}, + * number = {Jul}, + * pages = {2121--2159}, + * year = {2011} * } * @endcode * diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp index 2fb52573b1b..b086926388b 100644 --- a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp +++ b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp @@ -58,8 +58,8 @@ class AdaGradUpdate * gradient matrix is initialized to the zeros matrix with the same size as * gradient matrix (see mlpack::optimization::SGD::Optimizer). * - * @param rows number of rows in the gradient matrix. - * @param cols number of columns in the gradient matrix. + * @param rows Number of rows in the gradient matrix. + * @param cols Number of columns in the gradient matrix. */ void Initialize(const size_t rows, const size_t cols) diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp index d4808c6cd93..23615fdd9e6 100644 --- a/src/mlpack/core/optimizers/adam/adam.hpp +++ b/src/mlpack/core/optimizers/adam/adam.hpp @@ -37,11 +37,11 @@ namespace optimization { * * @code * @article{Kingma2014, - * author = {Diederik P. Kingma and Jimmy Ba}, - * title = {Adam: {A} Method for Stochastic Optimization}, - * journal = {CoRR}, - * year = {2014}, - * url = {http://arxiv.org/abs/1412.6980} + * author = {Diederik P. Kingma and Jimmy Ba}, + * title = {Adam: {A} Method for Stochastic Optimization}, + * journal = {CoRR}, + * year = {2014}, + * url = {http://arxiv.org/abs/1412.6980} * } * @endcode * @@ -95,13 +95,13 @@ class AdamType * function is visited in linear order. */ AdamType(DecomposableFunctionType& function, - const double stepSize = 0.001, - const double beta1 = 0.9, - const double beta2 = 0.999, - const double eps = 1e-8, - const size_t maxIterations = 100000, - const double tolerance = 1e-5, - const bool shuffle = true); + const double stepSize = 0.001, + const double beta1 = 0.9, + const double beta2 = 0.999, + const double eps = 1e-8, + const size_t maxIterations = 100000, + const double tolerance = 1e-5, + const bool shuffle = true); /** * Optimize the given function using Adam. The given starting point will be diff --git a/src/mlpack/core/optimizers/adam/adam_update.hpp b/src/mlpack/core/optimizers/adam/adam_update.hpp index b420b57311a..0f64c82327c 100644 --- a/src/mlpack/core/optimizers/adam/adam_update.hpp +++ b/src/mlpack/core/optimizers/adam/adam_update.hpp @@ -31,11 +31,11 @@ namespace optimization { * * @code * @article{Kingma2014, - * author = {Diederik P. Kingma and Jimmy Ba}, - * title = {Adam: {A} Method for Stochastic Optimization}, - * journal = {CoRR}, - * year = {2014}, - * url = {http://arxiv.org/abs/1412.6980} + * author = {Diederik P. Kingma and Jimmy Ba}, + * title = {Adam: {A} Method for Stochastic Optimization}, + * journal = {CoRR}, + * year = {2014}, + * url = {http://arxiv.org/abs/1412.6980} * } * @endcode */ @@ -65,8 +65,8 @@ class AdamUpdate * The Initialize method is called by SGD Optimizer method before the start of * the iteration update process. * - * @param rows number of rows in the gradient matrix. - * @param cols number of columns in the gradient matrix. + * @param rows Number of rows in the gradient matrix. + * @param cols Number of columns in the gradient matrix. */ void Initialize(const size_t rows, const size_t cols) @@ -105,7 +105,7 @@ class AdamUpdate * m / (arma::sqrt(v) + (arma::sqrt(biasCorrection2) * eps). */ iterate -= (stepSize * std::sqrt(biasCorrection2) / biasCorrection1) * - m / (arma::sqrt(v) + epsilon); + m / (arma::sqrt(v) + epsilon); } //! Get the value used to initialise the squared gradient parameter. diff --git a/src/mlpack/core/optimizers/adam/adamax_update.hpp b/src/mlpack/core/optimizers/adam/adamax_update.hpp index 6337efaae3a..9ac281cca8e 100644 --- a/src/mlpack/core/optimizers/adam/adamax_update.hpp +++ b/src/mlpack/core/optimizers/adam/adamax_update.hpp @@ -67,8 +67,8 @@ class AdaMaxUpdate * The Initialize method is called by SGD Optimizer method before the start of * the iteration update process. * - * @param rows number of rows in the gradient matrix. - * @param cols number of columns in the gradient matrix. + * @param rows Number of rows in the gradient matrix. + * @param cols Number of columns in the gradient matrix. */ void Initialize(const size_t rows, const size_t cols) diff --git a/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp index 78ee1038130..79fb8dded78 100644 --- a/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp +++ b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp @@ -66,9 +66,9 @@ class GradientDescent * @param tolerance Maximum absolute tolerance to terminate algorithm. */ GradientDescent(FunctionType& function, - const double stepSize = 0.01, - const size_t maxIterations = 100000, - const double tolerance = 1e-5); + const double stepSize = 0.01, + const size_t maxIterations = 100000, + const double tolerance = 1e-5); /** * Optimize the given function using gradient descent. The given starting diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp index 4d67dd906b2..c3da802d701 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp @@ -38,9 +38,9 @@ namespace optimization { * * @code * @misc{tieleman2012, - * title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine - * Learning}, - * year={2012} + * title = {Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine + * Learning}, + * year = {2012} * } * @endcode * diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp index b9a821a8c48..c86f09f0c3c 100644 --- a/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp +++ b/src/mlpack/core/optimizers/rmsprop/rmsprop_update.hpp @@ -35,9 +35,9 @@ namespace optimization { * * @code * @misc{tieleman2012, - * title={Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine - * Learning}, - * year={2012} + * title = {Lecture 6.5 - rmsprop, COURSERA: Neural Networks for Machine + * Learning}, + * year = {2012} * } * @endcode */ @@ -63,8 +63,8 @@ class RMSPropUpdate * The Initialize method is called by SGD Optimizer method before the start of * the iteration update process. * - * @param rows number of rows in the gradient matrix. - * @param cols number of columns in the gradient matrix. + * @param rows Number of rows in the gradient matrix. + * @param cols Number of columns in the gradient matrix. */ void Initialize(const size_t rows, const size_t cols) @@ -86,7 +86,8 @@ class RMSPropUpdate { meanSquaredGradient *= alpha; meanSquaredGradient += (1 - alpha) * (gradient % gradient); - iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + epsilon); + iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + + epsilon); } //! Get the value used to initialise the squared gradient parameter. diff --git a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp index 2947c355f90..1ea08560349 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/momentum_update.hpp @@ -42,23 +42,23 @@ namespace optimization { * * @code * @article{rumelhart1988learning, - * title={Learning representations by back-propagating errors}, - * author={Rumelhart, David E. and Hinton, Geoffrey E. and - * Williams, Ronald J.}, - * journal={Cognitive Modeling}, - * volume={5}, - * number={3}, - * pages={1}, - * year={1988} + * title = {Learning representations by back-propagating errors}, + * author = {Rumelhart, David E. and Hinton, Geoffrey E. and + * Williams, Ronald J.}, + * journal = {Cognitive Modeling}, + * volume = {5}, + * number = {3}, + * pages = {1}, + * year = {1988} * } * * @code * @book{Goodfellow-et-al-2016, - * title={Deep Learning}, - * author={Ian Goodfellow and Yoshua Bengio and Aaron Courville}, - * publisher={MIT Press}, - * note={\url{http://www.deeplearningbook.org}}, - * year={2016} + * title = {Deep Learning}, + * author = {Ian Goodfellow and Yoshua Bengio and Aaron Courville}, + * publisher = {MIT Press}, + * note = {\url{http://www.deeplearningbook.org}}, + * year = {2016} * } */ class MomentumUpdate @@ -78,14 +78,14 @@ class MomentumUpdate * matrix is initialized to the zeros matrix with the same size as the * gradient matrix (see mlpack::optimization::SGD::Optimizer ) * - * @param n_rows number of rows in the gradient matrix. - * @param n_cols number of columns in the gradient matrix. + * @param rows Number of rows in the gradient matrix. + * @param cols Number of columns in the gradient matrix. */ - void Initialize(const size_t n_rows, - const size_t n_cols) + void Initialize(const size_t rows, + const size_t cols) { //Initialize am empty velocity matrix. - velocity = arma::zeros(n_rows, n_cols); + velocity = arma::zeros(rows, cols); } /** diff --git a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp index 1bd85bf1c37..d76264864b7 100644 --- a/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp +++ b/src/mlpack/core/optimizers/sgd/update_policies/vanilla_update.hpp @@ -36,10 +36,10 @@ class VanillaUpdate * the iteration update process. The vanilla update doesn't initialize * anything. * - * @param n_rows number of rows in the gradient matrix. - * @param n_cols number of columns in the gradient matrix. + * @param rows Number of rows in the gradient matrix. + * @param cols Number of columns in the gradient matrix. */ - void Initialize(const size_t /* n_rows */, const size_t /* n_cols */) + void Initialize(const size_t /* rows */, const size_t /* cols */) { /* Do nothing. */ } /** diff --git a/src/mlpack/core/optimizers/smorms3/smorms3.hpp b/src/mlpack/core/optimizers/smorms3/smorms3.hpp index 41643c80236..d8b67497079 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3.hpp @@ -31,16 +31,16 @@ namespace optimization { * * @code * @misc{Funk2015, - * author = {Simon Funk}, - * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, - * year = {2015} - * url = {http://sifter.org/~simon/journal/20150420.html} + * author = {Simon Funk}, + * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, + * year = {2015} + * url = {http://sifter.org/~simon/journal/20150420.html} * } * @endcode * * - * For SMORMS3 to work, a DecomposableFunctionType template parameter is required. - * This class must implement the following function: + * For SMORMS3 to work, a DecomposableFunctionType template parameter is + * required. This class must implement the following function: * * size_t NumFunctions(); * double Evaluate(const arma::mat& coordinates, const size_t i); @@ -57,7 +57,7 @@ namespace optimization { * is held internally in the DecomposableFunctionType). * * @tparam DecomposableFunctionType Decomposable objective function type to be - * minimized. + * minimized. */ template class SMORMS3 @@ -73,7 +73,8 @@ class SMORMS3 * * @param function Function to be optimized (minimized). * @param stepSize Step size for each iteration. - * @param epsilon Value used to initialise the mean squared gradient parameter. + * @param epsilon Value used to initialise the mean squared gradient + * parameter. * @param maxIterations Maximum number of iterations allowed (0 means no * limit). * @param tolerance Maximum absolute tolerance to terminate algorithm. @@ -81,14 +82,14 @@ class SMORMS3 * function is visited in linear order. */ SMORMS3(DecomposableFunctionType& function, - const double stepSize = 0.001, - const double epsilon = 1e-16, - const size_t maxIterations = 100000, - const double tolerance = 1e-5, - const bool shuffle = true); + const double stepSize = 0.001, + const double epsilon = 1e-16, + const size_t maxIterations = 100000, + const double tolerance = 1e-5, + const bool shuffle = true); /** - * Optimize the given function using SMORMS3. The given starting point will + * Optimize the given function using SMORMS3. The given starting point will * be modified to store the finishing point of the algorithm, and the final * objective value is returned. * @@ -98,7 +99,10 @@ class SMORMS3 double Optimize(arma::mat& iterate) { return optimizer.Optimize(iterate); } //! Get the instantiated function to be optimized. - const DecomposableFunctionType& Function() const { return optimizer.Function(); } + const DecomposableFunctionType& Function() const + { + return optimizer.Function(); + } //! Modify the instantiated function. DecomposableFunctionType& Function() { return optimizer.Function(); } diff --git a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp index fbc534926be..3cf35eb7df2 100644 --- a/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp +++ b/src/mlpack/core/optimizers/smorms3/smorms3_update.hpp @@ -26,10 +26,10 @@ namespace optimization { * * @code * @misc{Funk2015, - * author = {Simon Funk}, - * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, - * year = {2015} - * url = {http://sifter.org/~simon/journal/20150420.html} + * author = {Simon Funk}, + * title = {RMSprop loses to SMORMS3 - Beware the Epsilon!}, + * year = {2015} + * url = {http://sifter.org/~simon/journal/20150420.html} * } * @endcode */ @@ -40,23 +40,18 @@ class SMORMS3Update /** * Construct the SMORMS3 update policy with given epsilon parameter. * - * @param epsilon Value used to initialise the mean squared gradient parameter. + * @param epsilon Value used to initialise the mean squared gradient + * parameter. */ - SMORMS3Update(const double epsilon = 1e-16) : - epsilon(epsilon) + SMORMS3Update(const double epsilon = 1e-16) : epsilon(epsilon) { /* Do nothing. */ } - //! Get the value used to initialise the mean squared gradient parameter. - double Epsilon() const { return epsilon; } - //! Modify the value used to initialise the mean squared gradient parameter. - double& Epsilon() { return epsilon; } - /** * The Initialize method is called by SGD::Optimize method with UpdatePolicy * SMORMS3Update before the start of the iteration update process. * - * @param rows number of rows in the gradient matrix. - * @param cols number of columns in the gradient matrix. + * @param rows Number of rows in the gradient matrix. + * @param cols Number of columns in the gradient matrix. */ void Initialize(const size_t rows, const size_t cols) @@ -96,10 +91,16 @@ class SMORMS3Update mem %= (1 - x); mem += 1; } + + //! Get the value used to initialise the mean squared gradient parameter. + double Epsilon() const { return epsilon; } + //! Modify the value used to initialise the mean squared gradient parameter. + double& Epsilon() { return epsilon; } + private: //! The value used to initialise the mean squared gradient parameter. double epsilon; - + // The parameters mem, g and g2. arma::mat mem, g, g2; }; From 2df61cf6fd9e0aaa5b37c65919e175f797796361 Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Mon, 24 Apr 2017 11:06:38 -0400 Subject: [PATCH 77/78] Mark extra overload deprecated. --- src/mlpack/core/optimizers/lbfgs/lbfgs.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp index 422e6c7cc2f..8c6f64168fb 100644 --- a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp +++ b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp @@ -94,11 +94,15 @@ class L_BFGS * finishing point of the algorithm, and the final objective value is * returned. * + * This overload will be removed in mlpack 3.0.0---you should set + * maxIterations in the constructor instead. + * * @param iterate Starting point (will be modified). * @param maxIterations Maximum number of iterations (0 specifies no limit). * @return Objective value of the final point. */ - double Optimize(arma::mat& iterate, const size_t maxIterations); + mlpack_deprecated double Optimize(arma::mat& iterate, + const size_t maxIterations); //! Return the function that is being optimized. const FunctionType& Function() const { return function; } From 7f7561d9cbda272055868c9d3c3bba08d9f0f2af Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Mon, 24 Apr 2017 16:59:39 -0400 Subject: [PATCH 78/78] Switch implementations. --- src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp b/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp index f8e44a383b7..38fa92bd5b1 100644 --- a/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp +++ b/src/mlpack/core/optimizers/lbfgs/lbfgs_impl.hpp @@ -326,9 +326,11 @@ L_BFGS::MinPointIterate() const } template -inline double L_BFGS::Optimize(arma::mat& iterate) +inline double L_BFGS::Optimize(arma::mat& iterate, + const size_t maxIterations) { - return Optimize(iterate, maxIterations); + this->maxIterations = maxIterations; + return Optimize(iterate); } /** @@ -341,8 +343,7 @@ inline double L_BFGS::Optimize(arma::mat& iterate) * @param iterate Starting point (will be modified) */ template -double L_BFGS::Optimize(arma::mat& iterate, - const size_t maxIterations) +double L_BFGS::Optimize(arma::mat& iterate) { // Ensure that the cubes holding past iterations' information are the right // size. Also set the current best point value to the maximum.