Merge remote-tracking branch 'mlpack/master'

rsalary · Apr 25, 2017 · a41cda1 · a41cda1
2 parents 7e9fdea + 7f7561d
commit a41cda1
Show file tree

Hide file tree

Showing 58 changed files with 1,704 additions and 709 deletions.
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
@@ -78,6 +78,7 @@ Copyright:
   Copyright 2017, Sagar B Hathwar <[email protected]>
   Copyright 2017, Nishanth Hegde <[email protected]>
   Copyright 2017, Parminder Singh <[email protected]>
+  Copyright 2017, CodeAi <[email protected]>
 
 License: BSD-3-clause
   All rights reserved.

diff --git a/HISTORY.md b/HISTORY.md
@@ -1,6 +1,11 @@
 ### mlpack ?.?.?
 ###### ????-??-??
 
+### mlpack 2.2.1
+###### 2017-04-13
+  * Compilation fix for mlpack_nca and mlpack_test on older Armadillo versions
+    (#984).
+
 ### mlpack 2.2.0
 ###### 2017-03-21
   * Bugfix for mlpack_knn program (#816).

diff --git a/doc/guide/build.hpp b/doc/guide/build.hpp
@@ -23,14 +23,14 @@ href="https://keon.io/mlpack/mlpack-on-windows/">Keon's excellent tutorial</a>.
 
 @section Download latest mlpack build
 Download latest mlpack build from here:
-<a href="http://www.mlpack.org/files/mlpack-2.2.0.tar.gz">mlpack-2.2.0</a>
+<a href="http://www.mlpack.org/files/mlpack-2.2.1.tar.gz">mlpack-2.2.1</a>
 
 @section builddir Creating Build Directory
 
 Once the mlpack source is unpacked, you should create a build directory.
 
 @code
-$ cd mlpack-2.2.0
+$ cd mlpack-2.2.1
 $ mkdir build
 @endcode
 

diff --git a/src/mlpack/core.hpp b/src/mlpack/core.hpp
@@ -219,6 +219,7 @@
  *   - Sagar B Hathwar <[email protected]>
  *   - Nishanth Hegde <[email protected]>
  *   - Parminder Singh <[email protected]>
+ *   - CodeAi (deep learning bug detector) <[email protected]>
  */
 
 // First, include all of the prerequisites.

diff --git a/src/mlpack/core/optimizers/CMakeLists.txt b/src/mlpack/core/optimizers/CMakeLists.txt
@@ -10,6 +10,7 @@ set(DIRS
   sa
   sdp
   sgd
+  smorms3
 )
 
 foreach(dir ${DIRS})

diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta.hpp
@@ -34,10 +34,10 @@ namespace optimization {
  *
  * @code
  * @article{Zeiler2012,
- *   author    = {Matthew D. Zeiler},
- *   title     = {{ADADELTA:} An Adaptive Learning Rate Method},
- *   journal   = {CoRR},
- *   year      = {2012}
+ *   author  = {Matthew D. Zeiler},
+ *   title   = {{ADADELTA:} An Adaptive Learning Rate Method},
+ *   journal = {CoRR},
+ *   year    = {2012}
  * }
  * @endcode
  *

diff --git a/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp b/src/mlpack/core/optimizers/ada_delta/ada_delta_update.hpp
@@ -29,10 +29,10 @@ namespace optimization {
  *
  * @code
  * @article{Zeiler2012,
- *   author    = {Matthew D. Zeiler},
- *   title     = {{ADADELTA:} An Adaptive Learning Rate Method},
- *   journal   = {CoRR},
- *   year      = {2012}
+ *   author  = {Matthew D. Zeiler},
+ *   title   = {{ADADELTA:} An Adaptive Learning Rate Method},
+ *   journal = {CoRR},
+ *   year    = {2012}
  * }
  * @endcode
  *

diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad.hpp
@@ -30,13 +30,13 @@ namespace optimization {
  *
  * @code
  * @article{duchi2011adaptive,
- *   author    = {Duchi, John and Hazan, Elad and Singer, Yoram},
- *   title     = {Adaptive subgradient methods for online learning and stochastic optimization},
- *   journal   = {Journal of Machine Learning Research},
- *   volume    = {12},
- *   number    = {Jul},
- *   pages     = {2121--2159},
- *   year      = {2011}
+ *   author  = {Duchi, John and Hazan, Elad and Singer, Yoram},
+ *   title   = {Adaptive subgradient methods for online learning and stochastic optimization},
+ *   journal = {Journal of Machine Learning Research},
+ *   volume  = {12},
+ *   number  = {Jul},
+ *   pages   = {2121--2159},
+ *   year    = {2011}
  * }
  * @endcode
  *

diff --git a/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp b/src/mlpack/core/optimizers/ada_grad/ada_grad_update.hpp
@@ -58,8 +58,8 @@ class AdaGradUpdate
    * gradient matrix is initialized to the zeros matrix with the same size as
    * gradient matrix (see mlpack::optimization::SGD::Optimizer).
    *
-   * @param rows number of rows in the gradient matrix.
-   * @param cols number of columns in the gradient matrix.
+   * @param rows Number of rows in the gradient matrix.
+   * @param cols Number of columns in the gradient matrix.
    */
   void Initialize(const size_t rows,
                   const size_t cols)

diff --git a/src/mlpack/core/optimizers/adam/CMakeLists.txt b/src/mlpack/core/optimizers/adam/CMakeLists.txt
@@ -1,6 +1,8 @@
 set(SOURCES
   adam.hpp
   adam_impl.hpp
+  adam_update.hpp
+  adamax_update.hpp
 )
 
 set(DIR_SRCS)

diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -20,6 +20,10 @@
 
 #include <mlpack/prereqs.hpp>
 
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+#include "adam_update.hpp"
+#include "adamax_update.hpp"
+
 namespace mlpack {
 namespace optimization {
 
@@ -33,10 +37,11 @@ namespace optimization {
  *
  * @code
  * @article{Kingma2014,
- *   author    = {Diederik P. Kingma and Jimmy Ba},
- *   title     = {Adam: {A} Method for Stochastic Optimization},
- *   journal   = {CoRR},
- *   year      = {2014}
+ *   author  = {Diederik P. Kingma and Jimmy Ba},
+ *   title   = {Adam: {A} Method for Stochastic Optimization},
+ *   journal = {CoRR},
+ *   year    = {2014},
+ *   url     = {http://arxiv.org/abs/1412.6980}
  * }
  * @endcode
  *
@@ -60,9 +65,13 @@ namespace optimization {
  *
  * @tparam DecomposableFunctionType Decomposable objective function type to be
  *     minimized.
+ * @tparam UpdateRule Adam optimizer update rule to be used.
  */
-template<typename DecomposableFunctionType>
-class Adam
+template<
+    typename DecomposableFunctionType,
+    typename UpdateRule = AdamUpdate
+>
+class AdamType
 {
  public:
   /**
@@ -84,18 +93,15 @@ class Adam
    * @param tolerance Maximum absolute tolerance to terminate algorithm.
    * @param shuffle If true, the function order is shuffled; otherwise, each
    *        function is visited in linear order.
-   * @param adaMax If true, then the AdaMax optimizer is used; otherwise, by
-   *        default the Adam optimizer is used.
    */
-  Adam(DecomposableFunctionType& function,
-      const double stepSize = 0.001,
-      const double beta1 = 0.9,
-      const double beta2 = 0.999,
-      const double eps = 1e-8,
-      const size_t maxIterations = 100000,
-      const double tolerance = 1e-5,
-      const bool shuffle = true,
-      const bool adaMax = false);
+  AdamType(DecomposableFunctionType& function,
+           const double stepSize = 0.001,
+           const double beta1 = 0.9,
+           const double beta2 = 0.999,
+           const double eps = 1e-8,
+           const size_t maxIterations = 100000,
+           const double tolerance = 1e-5,
+           const bool shuffle = true);
 
   /**
    * Optimize the given function using Adam. The given starting point will be
@@ -105,82 +111,61 @@ class Adam
    * @param iterate Starting point (will be modified).
    * @return Objective value of the final point.
    */
-  double Optimize(arma::mat& iterate);
+  double Optimize(arma::mat& iterate){ return optimizer.Optimize(iterate); }
 
   //! Get the instantiated function to be optimized.
-  const DecomposableFunctionType& Function() const { return function; }
+  const DecomposableFunctionType& Function() const
+  {
+    return optimizer.Function();
+  }
   //! Modify the instantiated function.
-  DecomposableFunctionType& Function() { return function; }
+  DecomposableFunctionType& Function() { return optimizer.Function(); }
 
   //! Get the step size.
-  double StepSize() const { return stepSize; }
+  double StepSize() const { return optimizer.StepSize(); }
   //! Modify the step size.
-  double& StepSize() { return stepSize; }
+  double& StepSize() { return optimizer.StepSize(); }
 
   //! Get the smoothing parameter.
-  double Beta1() const { return beta1; }
+  double Beta1() const { return optimizer.UpdatePolicy().Beta1(); }
   //! Modify the smoothing parameter.
-  double& Beta1() { return beta1; }
+  double& Beta1() { return optimizer.UpdatePolicy().Beta1(); }
 
   //! Get the second moment coefficient.
-  double Beta2() const { return beta2; }
+  double Beta2() const { return optimizer.UpdatePolicy().Beta2(); }
   //! Modify the second moment coefficient.
-  double& Beta2() { return beta2; }
+  double& Beta2() { return optimizer.UpdatePolicy().Beta2(); }
 
   //! Get the value used to initialise the mean squared gradient parameter.
-  double Epsilon() const { return eps; }
+  double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); }
   //! Modify the value used to initialise the mean squared gradient parameter.
-  double& Epsilon() { return eps; }
+  double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); }
 
   //! Get the maximum number of iterations (0 indicates no limit).
-  size_t MaxIterations() const { return maxIterations; }
+  size_t MaxIterations() const { return optimizer.MaxIterations(); }
   //! Modify the maximum number of iterations (0 indicates no limit).
-  size_t& MaxIterations() { return maxIterations; }
+  size_t& MaxIterations() { return optimizer.MaxIterations(); }
 
   //! Get the tolerance for termination.
-  double Tolerance() const { return tolerance; }
+  double Tolerance() const { return optimizer.Tolerance(); }
   //! Modify the tolerance for termination.
-  double& Tolerance() { return tolerance; }
+  double& Tolerance() { return optimizer.Tolerance(); }
 
   //! Get whether or not the individual functions are shuffled.
-  bool Shuffle() const { return shuffle; }
+  bool Shuffle() const { return optimizer.Shuffle(); }
   //! Modify whether or not the individual functions are shuffled.
-  bool& Shuffle() { return shuffle; }
-
-  //! Get whether or not the AdaMax optimizer is specified.
-  bool AdaMax() const { return adaMax; }
-  //! Modify wehther or not the AdaMax optimizer is to be used.
-  bool& AdaMax() { return adaMax; }
+  bool& Shuffle() { return optimizer.Shuffle(); }
 
  private:
-  //! The instantiated function.
-  DecomposableFunctionType& function;
-
-  //! The step size for each example.
-  double stepSize;
-
-  //! Exponential decay rate for the first moment estimates.
-  double beta1;
-
-  //! Exponential decay rate for the weighted infinity norm estimates.
-  double beta2;
-
-  //! The value used to initialise the mean squared gradient parameter.
-  double eps;
-
-  //! The maximum number of allowed iterations.
-  size_t maxIterations;
-
-  //! The tolerance for termination.
-  double tolerance;
+  //! The Stochastic Gradient Descent object with Adam policy.
+  SGD<DecomposableFunctionType, UpdateRule> optimizer;
+};
 
-  //! Controls whether or not the individual functions are shuffled when
-  //! iterating.
-  bool shuffle;
+template<typename DecomposableFunctionType>
+using Adam = AdamType<DecomposableFunctionType, AdamUpdate>;
 
-  //! Specifies whether or not the AdaMax optimizer is to be used.
-  bool adaMax;
-};
+template<typename DecomposableFunctionType>
+using AdaMax = AdamType<DecomposableFunctionType, AdaMaxUpdate>;
 
 } // namespace optimization
 } // namespace mlpack
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,6 +10,7 @@ set(DIRS @@
       sa
       sdp
       sgd
+      smorms3
     )
     foreach(dir ${DIRS})
@@ Expand Down @@