Skip to content

Commit

Permalink
Adding dual averaging scale adapter implementation (#46)
Browse files Browse the repository at this point in the history
* Adding dual averaging scale adapter implementation

* Fix indentation

* Rename variance_adapter to variance_shape_adapter
  • Loading branch information
matt-graham authored Oct 24, 2024
1 parent 517bde4 commit 5d29b85
Show file tree
Hide file tree
Showing 11 changed files with 322 additions and 73 deletions.
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

export(barker_proposal)
export(chain_state)
export(dual_averaging_scale_adapter)
export(example_gaussian_stan_model)
export(hamiltonian_proposal)
export(langevin_proposal)
export(random_walk_proposal)
export(robust_shape_adapter)
export(sample_chain)
export(scale_adapter)
export(simple_scale_adapter)
export(target_distribution_from_stan_model)
export(trace_function_from_stan_model)
export(variance_adapter)
export(variance_shape_adapter)
114 changes: 100 additions & 14 deletions R/adaptation.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#' Create object to adapt proposal scale to coerce average acceptance rate.
#' Create object to adapt proposal scale to coerce average acceptance rate using
#' a Robbins and Monro (1951) scheme.
#'
#' @param initial_scale Initial value to use for scale parameter. If not set
#' explicitly a proposal and dimension dependent default will be used.
Expand All @@ -25,9 +26,9 @@
#' grad_log_density = function(x) -x
#' )
#' proposal <- barker_proposal(target_distribution)
#' adapter <- scale_adapter(initial_scale = 1., target_accept_prob = 0.4)
#' adapter <- simple_scale_adapter(initial_scale = 1., target_accept_prob = 0.4)
#' adapter$initialize(proposal, chain_state(c(0, 0)))
scale_adapter <- function(
simple_scale_adapter <- function(
initial_scale = NULL, target_accept_prob = NULL, kappa = 0.6) {
log_scale <- NULL
initialize <- function(proposal, initial_state) {
Expand All @@ -41,9 +42,9 @@ scale_adapter <- function(
if (is.null(target_accept_prob)) {
target_accept_prob <- proposal$default_target_accept_prob()
}
gamma <- sample_index^(-kappa)
beta <- sample_index^(-kappa)
accept_prob <- state_and_statistics$statistics$accept_prob
log_scale <<- log_scale + gamma * (accept_prob - target_accept_prob)
log_scale <<- log_scale + beta * (accept_prob - target_accept_prob)
proposal$update(scale = exp(log_scale))
}
list(
Expand All @@ -54,12 +55,95 @@ scale_adapter <- function(
)
}

#' Create object to adapt proposal scale to coerce average acceptance rate
#' using dual averaging scheme of Nesterov (2009) and Hoffman and Gelman (2014).
#'
#' @inherit simple_scale_adapter params return
#'
#' @param gamma Regularization coefficient for (log) scale in dual averaging
#' algorithm. Controls amount of regularization of (log) scale towards `mu`.
#' Should be set to a non-negative value. Defaults to value recommended in
#' Hoffman and Gelman (2014).
#' @param iteration_offset Offset to chain iteration used for the iteration
#' based weighting of the adaptation statistic error estimate. Should be set
#' to a non-negative value. A value greater than zero has the effect of
#' stabilizing early iterations. Defaults to value recommended in
#' Hoffman and Gelman (2014).
#' @param mu Value to regularize (log) scale towards. If `NULL` (the default),
#' `mu` will be set to `log(10 * initial_scale)`, as recommended in Hoffman
#' and Gelman (2014).
#'
#' @export
#'
#' @examples
#' target_distribution <- list(
#' log_density = function(x) -sum(x^2) / 2,
#' grad_log_density = function(x) -x
#' )
#' proposal <- barker_proposal(target_distribution)
#' adapter <- dual_averaging_scale_adapter(
#' initial_scale = 1., target_accept_prob = 0.4
#' )
#' adapter$initialize(proposal, chain_state(c(0, 0)))
dual_averaging_scale_adapter <- function(
initial_scale = NULL,
target_accept_prob = NULL,
kappa = 0.75,
gamma = 0.05,
iteration_offset = 10,
mu = NULL) {
log_scale <- NULL
smoothed_log_scale <- 0
accept_prob_error <- 0
initialize <- function(proposal, initial_state) {
if (is.null(initial_scale)) {
initial_scale <- proposal$default_initial_scale(initial_state$dimension())
}
if (is.null(mu)) {
mu <<- log(10 * initial_scale)
}
log_scale <<- log(initial_scale)
proposal$update(scale = initial_scale)
}
update <- function(proposal, sample_index, state_and_statistics) {
if (is.null(target_accept_prob)) {
target_accept_prob <- proposal$default_target_accept_prob()
}
accept_prob <- state_and_statistics$statistics$accept_prob
offset_sample_index <- sample_index + iteration_offset
accept_prob_error <<- (
(1 - 1 / offset_sample_index) * accept_prob_error + (
target_accept_prob - accept_prob
) / offset_sample_index
)
log_scale <<- mu - sqrt(sample_index) * accept_prob_error / gamma
beta <- sample_index^(-kappa)
smoothed_log_scale <<- beta * log_scale + (1 - beta) * smoothed_log_scale
proposal$update(scale = exp(log_scale))
}
finalize <- function(proposal) {
proposal$update(scale = exp(smoothed_log_scale))
}
list(
initialize = initialize,
update = update,
finalize = finalize,
state = function() {
list(
log_scale = log_scale,
smoothed_log_scale = smoothed_log_scale,
accept_prob_error = accept_prob_error
)
}
)
}

#' Create object to adapt proposal with per dimension scales based on estimates
#' of target distribution variances.
#'
#' @param kappa Decay rate exponent in `[0.5, 1]` for adaptation learning rate.
#'
#' @inherit scale_adapter return
#' @inherit simple_scale_adapter return
#'
#' @export
#' @examples
Expand All @@ -68,20 +152,22 @@ scale_adapter <- function(
#' grad_log_density = function(x) -x
#' )
#' proposal <- barker_proposal(target_distribution)
#' adapter <- variance_adapter()
#' adapter <- variance_shape_adapter()
#' adapter$initialize(proposal, chain_state(c(0, 0)))
variance_adapter <- function(kappa = 0.6) {
variance_shape_adapter <- function(kappa = 0.6) {
mean_estimate <- NULL
variance_estimate <- NULL
initialize <- function(proposal, initial_state) {
mean_estimate <<- initial_state$position()
variance_estimate <<- rep(1., initial_state$dimension())
}
update <- function(proposal, sample_index, state_and_statistics) {
gamma <- sample_index^(-kappa)
# Offset sample_index by 1 so that initial unity variance_estimate acts as
# regularizer
beta <- (sample_index + 1)^(-kappa)
position <- state_and_statistics$state$position()
mean_estimate <<- mean_estimate + gamma * (position - mean_estimate)
variance_estimate <<- variance_estimate + gamma * (
mean_estimate <<- mean_estimate + beta * (position - mean_estimate)
variance_estimate <<- variance_estimate + beta * (
(position - mean_estimate)^2 - variance_estimate
)
proposal$update(shape = sqrt(variance_estimate))
Expand All @@ -107,9 +193,9 @@ variance_adapter <- function(kappa = 0.6) {
#' coerced acceptance rate. _Statistics and Computing_, 22, 997-1008.
#' <https://doi.iorg/10.1007/s11222-011-9269-5>
#'
#' @inheritParams scale_adapter
#' @inheritParams simple_scale_adapter
#'
#' @inherit scale_adapter return
#' @inherit simple_scale_adapter return
#'
#' @export
#'
Expand Down Expand Up @@ -139,7 +225,7 @@ robust_shape_adapter <- function(
momentum <- state_and_statistics$proposed_state$momentum()
accept_prob <- state_and_statistics$statistics$accept_prob
shape <<- ramcmc::adapt_S(
shape, momentum, accept_prob, sample_index - 1, target_accept_prob, kappa
shape, momentum, accept_prob, sample_index, target_accept_prob, kappa
)
proposal$update(shape = shape)
}
Expand Down
2 changes: 1 addition & 1 deletion R/chains.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ chain_loop <- function(
state, target_distribution, proposal
)
for (adapter in adapters) {
adapter$update(proposal, chain_iteration + 1, state_and_statistics)
adapter$update(proposal, chain_iteration, state_and_statistics)
}
state <- state_and_statistics$state
if (record_traces_and_statistics) {
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ results <- sample_chain(
initial_state = rnorm(dimension),
n_warm_up_iteration = 1000,
n_main_iteration = 1000,
adapters = list(scale_adapter(), variance_adapter())
adapters = list(simple_scale_adapter(), variance_shape_adapter())
)
mean_accept_prob <- mean(results$statistics[, "accept_prob"])
adapted_shape <- proposal$parameters()$shape
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ results <- sample_chain(
initial_state = rnorm(dimension),
n_warm_up_iteration = 1000,
n_main_iteration = 1000,
adapters = list(scale_adapter(), variance_adapter())
adapters = list(simple_scale_adapter(), variance_shape_adapter())
)
mean_accept_prob <- mean(results$statistics[, "accept_prob"])
adapted_shape <- proposal$parameters()$shape
Expand All @@ -66,7 +66,7 @@ cat(
sprintf("Adapter scale est.: %s", toString(adapted_shape)),
sep = "\n"
)
#> Average acceptance probability is 0.40
#> Average acceptance probability is 0.41
#> True target scales: 1.50538046096953, 1.37774732725824, 0.277038897322645
#> Adapter scale est.: 1.35010920408606, 1.5140138215658, 0.248974800274054
#> Adapter scale est.: 1.2489768457131, 1.23111560302158, 0.215024121396933
```
69 changes: 69 additions & 0 deletions man/dual_averaging_scale_adapter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 12 additions & 6 deletions man/scale_adapter.Rd → man/simple_scale_adapter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/variance_adapter.Rd → man/variance_shape_adapter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5d29b85

Please sign in to comment.