Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding dual averaging scale adapter implementation #46

Merged
merged 3 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

export(barker_proposal)
export(chain_state)
export(dual_averaging_scale_adapter)
export(example_gaussian_stan_model)
export(hamiltonian_proposal)
export(langevin_proposal)
export(random_walk_proposal)
export(robust_shape_adapter)
export(sample_chain)
export(scale_adapter)
export(simple_scale_adapter)
export(target_distribution_from_stan_model)
export(trace_function_from_stan_model)
export(variance_adapter)
export(variance_shape_adapter)
114 changes: 100 additions & 14 deletions R/adaptation.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#' Create object to adapt proposal scale to coerce average acceptance rate.
#' Create object to adapt proposal scale to coerce average acceptance rate using
#' a Robbins and Monro (1951) scheme.
#'
#' @param initial_scale Initial value to use for scale parameter. If not set
#' explicitly a proposal and dimension dependent default will be used.
Expand All @@ -25,9 +26,9 @@
#' grad_log_density = function(x) -x
#' )
#' proposal <- barker_proposal(target_distribution)
#' adapter <- scale_adapter(initial_scale = 1., target_accept_prob = 0.4)
#' adapter <- simple_scale_adapter(initial_scale = 1., target_accept_prob = 0.4)
#' adapter$initialize(proposal, chain_state(c(0, 0)))
scale_adapter <- function(
simple_scale_adapter <- function(
initial_scale = NULL, target_accept_prob = NULL, kappa = 0.6) {
log_scale <- NULL
initialize <- function(proposal, initial_state) {
Expand All @@ -41,9 +42,9 @@ scale_adapter <- function(
if (is.null(target_accept_prob)) {
target_accept_prob <- proposal$default_target_accept_prob()
}
gamma <- sample_index^(-kappa)
beta <- sample_index^(-kappa)
accept_prob <- state_and_statistics$statistics$accept_prob
log_scale <<- log_scale + gamma * (accept_prob - target_accept_prob)
log_scale <<- log_scale + beta * (accept_prob - target_accept_prob)
proposal$update(scale = exp(log_scale))
}
list(
Expand All @@ -54,12 +55,95 @@ scale_adapter <- function(
)
}

#' Create object to adapt proposal scale to coerce average acceptance rate
#' using dual averaging scheme of Nesterov (2009) and Hoffman and Gelman (2014).
#'
#' @inherit simple_scale_adapter params return
#'
#' @param gamma Regularization coefficient for (log) scale in dual averaging
#' algorithm. Controls amount of regularization of (log) scale towards `mu`.
#' Should be set to a non-negative value. Defaults to value recommended in
#' Hoffman and Gelman (2014).
#' @param iteration_offset Offset to chain iteration used for the iteration
#' based weighting of the adaptation statistic error estimate. Should be set
#' to a non-negative value. A value greater than zero has the effect of
#' stabilizing early iterations. Defaults to value recommended in
#' Hoffman and Gelman (2014).
#' @param mu Value to regularize (log) scale towards. If `NULL` (the default),
#' `mu` will be set to `log(10 * initial_scale)`, as recommended in Hoffman
#' and Gelman (2014).
#'
#' @export
#'
#' @examples
#' target_distribution <- list(
#' log_density = function(x) -sum(x^2) / 2,
#' grad_log_density = function(x) -x
#' )
#' proposal <- barker_proposal(target_distribution)
#' adapter <- dual_averaging_scale_adapter(
#' initial_scale = 1., target_accept_prob = 0.4
#' )
#' adapter$initialize(proposal, chain_state(c(0, 0)))
dual_averaging_scale_adapter <- function(
initial_scale = NULL,
target_accept_prob = NULL,
kappa = 0.75,
gamma = 0.05,
iteration_offset = 10,
mu = NULL) {
log_scale <- NULL
smoothed_log_scale <- 0
accept_prob_error <- 0
initialize <- function(proposal, initial_state) {
if (is.null(initial_scale)) {
initial_scale <- proposal$default_initial_scale(initial_state$dimension())
}
if (is.null(mu)) {
mu <<- log(10 * initial_scale)
}
log_scale <<- log(initial_scale)
proposal$update(scale = initial_scale)
}
update <- function(proposal, sample_index, state_and_statistics) {
if (is.null(target_accept_prob)) {
target_accept_prob <- proposal$default_target_accept_prob()
}
accept_prob <- state_and_statistics$statistics$accept_prob
offset_sample_index <- sample_index + iteration_offset
accept_prob_error <<- (
(1 - 1 / offset_sample_index) * accept_prob_error + (
target_accept_prob - accept_prob
) / offset_sample_index
)
log_scale <<- mu - sqrt(sample_index) * accept_prob_error / gamma
beta <- sample_index^(-kappa)
smoothed_log_scale <<- beta * log_scale + (1 - beta) * smoothed_log_scale
proposal$update(scale = exp(log_scale))
}
finalize <- function(proposal) {
proposal$update(scale = exp(smoothed_log_scale))
}
list(
initialize = initialize,
update = update,
finalize = finalize,
state = function() {
list(
log_scale = log_scale,
smoothed_log_scale = smoothed_log_scale,
accept_prob_error = accept_prob_error
)
}
)
}

#' Create object to adapt proposal with per dimension scales based on estimates
#' of target distribution variances.
#'
#' @param kappa Decay rate exponent in `[0.5, 1]` for adaptation learning rate.
#'
#' @inherit scale_adapter return
#' @inherit simple_scale_adapter return
#'
#' @export
#' @examples
Expand All @@ -68,20 +152,22 @@ scale_adapter <- function(
#' grad_log_density = function(x) -x
#' )
#' proposal <- barker_proposal(target_distribution)
#' adapter <- variance_adapter()
#' adapter <- variance_shape_adapter()
#' adapter$initialize(proposal, chain_state(c(0, 0)))
variance_adapter <- function(kappa = 0.6) {
variance_shape_adapter <- function(kappa = 0.6) {
mean_estimate <- NULL
variance_estimate <- NULL
initialize <- function(proposal, initial_state) {
mean_estimate <<- initial_state$position()
variance_estimate <<- rep(1., initial_state$dimension())
}
update <- function(proposal, sample_index, state_and_statistics) {
gamma <- sample_index^(-kappa)
# Offset sample_index by 1 so that initial unity variance_estimate acts as
# regularizer
beta <- (sample_index + 1)^(-kappa)
position <- state_and_statistics$state$position()
mean_estimate <<- mean_estimate + gamma * (position - mean_estimate)
variance_estimate <<- variance_estimate + gamma * (
mean_estimate <<- mean_estimate + beta * (position - mean_estimate)
variance_estimate <<- variance_estimate + beta * (
(position - mean_estimate)^2 - variance_estimate
)
proposal$update(shape = sqrt(variance_estimate))
Expand All @@ -107,9 +193,9 @@ variance_adapter <- function(kappa = 0.6) {
#' coerced acceptance rate. _Statistics and Computing_, 22, 997-1008.
#' <https://doi.iorg/10.1007/s11222-011-9269-5>
#'
#' @inheritParams scale_adapter
#' @inheritParams simple_scale_adapter
#'
#' @inherit scale_adapter return
#' @inherit simple_scale_adapter return
#'
#' @export
#'
Expand Down Expand Up @@ -139,7 +225,7 @@ robust_shape_adapter <- function(
momentum <- state_and_statistics$proposed_state$momentum()
accept_prob <- state_and_statistics$statistics$accept_prob
shape <<- ramcmc::adapt_S(
shape, momentum, accept_prob, sample_index - 1, target_accept_prob, kappa
shape, momentum, accept_prob, sample_index, target_accept_prob, kappa
)
proposal$update(shape = shape)
}
Expand Down
2 changes: 1 addition & 1 deletion R/chains.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ chain_loop <- function(
state, target_distribution, proposal
)
for (adapter in adapters) {
adapter$update(proposal, chain_iteration + 1, state_and_statistics)
adapter$update(proposal, chain_iteration, state_and_statistics)
}
state <- state_and_statistics$state
if (record_traces_and_statistics) {
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ results <- sample_chain(
initial_state = rnorm(dimension),
n_warm_up_iteration = 1000,
n_main_iteration = 1000,
adapters = list(scale_adapter(), variance_adapter())
adapters = list(simple_scale_adapter(), variance_shape_adapter())
)
mean_accept_prob <- mean(results$statistics[, "accept_prob"])
adapted_shape <- proposal$parameters()$shape
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ results <- sample_chain(
initial_state = rnorm(dimension),
n_warm_up_iteration = 1000,
n_main_iteration = 1000,
adapters = list(scale_adapter(), variance_adapter())
adapters = list(simple_scale_adapter(), variance_shape_adapter())
)
mean_accept_prob <- mean(results$statistics[, "accept_prob"])
adapted_shape <- proposal$parameters()$shape
Expand All @@ -66,7 +66,7 @@ cat(
sprintf("Adapter scale est.: %s", toString(adapted_shape)),
sep = "\n"
)
#> Average acceptance probability is 0.40
#> Average acceptance probability is 0.41
#> True target scales: 1.50538046096953, 1.37774732725824, 0.277038897322645
#> Adapter scale est.: 1.35010920408606, 1.5140138215658, 0.248974800274054
#> Adapter scale est.: 1.2489768457131, 1.23111560302158, 0.215024121396933
```
69 changes: 69 additions & 0 deletions man/dual_averaging_scale_adapter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 12 additions & 6 deletions man/scale_adapter.Rd → man/simple_scale_adapter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/variance_adapter.Rd → man/variance_shape_adapter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading