Skip to content

Commit 01255d6

Browse files
committed
Export init and sample lm function for genMCMC example
1 parent de5a965 commit 01255d6

File tree

5 files changed

+189
-16
lines changed

5 files changed

+189
-16
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@ export(gbm_star)
1717
export(genEM_star)
1818
export(genMCMC_star)
1919
export(getEffSize)
20+
export(init_lm_gprior)
2021
export(lm_star)
2122
export(plot_coef)
2223
export(plot_fitted)
2324
export(plot_pmf)
2425
export(pvals)
2526
export(randomForest_star)
2627
export(round_floor)
28+
export(sample_lm_gprior)
2729
export(simBaS)
2830
export(simulate_nb_friedman)
2931
export(simulate_nb_lm)

R/STAR_Bayesian.R

Lines changed: 160 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -280,15 +280,14 @@ blm_star <- function(y, X, X_test = NULL,
280280
#' are estimated using moments (means and variances) of \code{y}.
281281
#'
282282
#' @examples
283-
#' \donttest{
284283
#' # Simulate data with count-valued response y:
285284
#' sim_dat = simulate_nb_lm(n = 100, p = 5)
286285
#' y = sim_dat$y; X = sim_dat$X
287286
#'
288287
#' # STAR: log-transformation:
289288
#' fit_log = genMCMC_star(y = y,
290-
#' sample_params = function(y, params) rSTAR:::sample_lm_gprior(y, X, params),
291-
#' init_params = function(y) rSTAR:::init_lm_gprior(y, X),
289+
#' sample_params = function(y, params) sample_lm_gprior(y, X, params),
290+
#' init_params = function(y) init_lm_gprior(y, X),
292291
#' transformation = 'log')
293292
#' # Posterior mean of each coefficient:
294293
#' coef(fit_log)
@@ -297,14 +296,13 @@ blm_star <- function(y, X, X_test = NULL,
297296
#' fit_log$WAIC
298297
#'
299298
#' # MCMC diagnostics:
300-
#' plot(as.ts(fit_log$post.coefficients[,1:3]))
299+
#' plot(as.ts(fit_log$post.beta[,1:3]))
301300
#'
302301
#' # Posterior predictive check:
303302
#' hist(apply(fit_log$post.pred, 1,
304303
#' function(x) mean(x==0)), main = 'Proportion of Zeros', xlab='');
305304
#' abline(v = mean(y==0), lwd=4, col ='blue')
306305
#'
307-
#'}
308306
#' @export
309307
genMCMC_star = function(y,
310308
sample_params,
@@ -387,7 +385,7 @@ genMCMC_star = function(y,
387385
stop("The sample_params() function must return 'mu', 'sigma', and 'coefficients'")
388386

389387
# Does the sampler return beta? If so, we want to store separately
390-
beta_sampled = !is.null(params$coefficients$beta)
388+
beta_sampled = !is.null(params$coefficients[["beta"]])
391389

392390
#Does the sampler return mu_test
393391
testpoints = !is.null(params$mu_test)
@@ -533,7 +531,7 @@ genMCMC_star = function(y,
533531
if(nsi==1){
534532
print("Burn-In Period")
535533
} else if (nsi < nburn){
536-
computeTimeRemaining(nsi, timer0, nstot, nrep = 4000)
534+
computeTimeRemaining(nsi, timer0, nburn, nrep = 4000)
537535
} else if (nsi==nburn){
538536
print("Starting sampling")
539537
timer1 = proc.time()[3]
@@ -669,7 +667,7 @@ genMCMC_star = function(y,
669667
#' X_nonlin = as.matrix(X[,(1:3)])
670668
#'
671669
#' # STAR: nonparametric transformation
672-
#' fit <- bam_star(y,X_lin, X_nonlin)
670+
#' fit <- bam_star(y,X_lin, X_nonlin, nburn=1000, nskip=0)
673671
#'
674672
#' # Posterior mean of each coefficient:
675673
#' coef(fit)
@@ -819,8 +817,8 @@ bam_star = function(y, X_lin, X_nonlin, splinetype="orthogonal",
819817
#' y = sim_dat$y; X = sim_dat$X
820818
#'
821819
#' # BART-STAR with log-transformation:
822-
#' fit_log = bart_star(y = y, X = X,
823-
#' transformation = 'log', save_y_hat = TRUE)
820+
#' fit_log = bart_star(y = y, X = X, transformation = 'log',
821+
#' save_y_hat = TRUE, nburn=1000, nskip=0)
824822
#'
825823
#' # Fitted values
826824
#' plot_fitted(y = sim_dat$Ey,
@@ -1116,7 +1114,7 @@ bart_star = function(y,
11161114
if(nsi==1){
11171115
print("Burn-In Period")
11181116
} else if (nsi < nburn){
1119-
computeTimeRemaining(nsi, timer0, nstot, nrep = 4000)
1117+
computeTimeRemaining(nsi, timer0, nburn, nrep = 4000)
11201118
} else if (nsi==nburn){
11211119
print("Starting sampling")
11221120
timer1 = proc.time()[3]
@@ -1454,7 +1452,7 @@ spline_star = function(y,
14541452
if(nsi==1){
14551453
print("Burn-In Period")
14561454
} else if (nsi < nburn){
1457-
computeTimeRemaining(nsi, timer0, nstot, nrep = 4000)
1455+
computeTimeRemaining(nsi, timer0, nburn, nrep = 4000)
14581456
} else if (nsi==nburn){
14591457
print("Starting sampling")
14601458
timer1 = proc.time()[3]
@@ -1467,3 +1465,153 @@ spline_star = function(y,
14671465

14681466
return(list(post_ytilde=post_ytilde))
14691467
}
1468+
1469+
#' Initialize linear regression parameters assuming a g-prior
1470+
#'
1471+
#' Initialize the parameters for a linear regression model assuming a
1472+
#' g-prior for the coefficients.
1473+
#'
1474+
#' @param y \code{n x 1} vector of data
1475+
#' @param X \code{n x p} matrix of predictors
1476+
#' @param X_test \code{n0 x p} matrix of predictors at test points (default is NULL)
1477+
#'
1478+
#' @return a named list \code{params} containing at least
1479+
#' \enumerate{
1480+
#' \item \code{mu}: vector of conditional means (fitted values)
1481+
#' \item \code{sigma}: the conditional standard deviation
1482+
#' \item \code{coefficients}: a named list of parameters that determine \code{mu}
1483+
#' }
1484+
#' Additionally, if X_test is not NULL, then the list includes an element
1485+
#' \code{mu_test}, the vector of conditional means at the test points
1486+
#'
1487+
#' @note The parameters in \code{coefficients} are:
1488+
#' \itemize{
1489+
#' \item \code{beta}: the \code{p x 1} vector of regression coefficients
1490+
#' components of \code{beta}
1491+
#' }
1492+
#'
1493+
#' @examples
1494+
#' # Simulate data for illustration:
1495+
#' sim_dat = simulate_nb_lm(n = 100, p = 5)
1496+
#' y = sim_dat$y; X = sim_dat$X
1497+
#'
1498+
#' # Initialize:
1499+
#' params = init_lm_gprior(y = y, X = X)
1500+
#' names(params)
1501+
#' names(params$coefficients)
1502+
#'
1503+
#' @export
1504+
init_lm_gprior = function(y, X, X_test=NULL){
1505+
1506+
# Initialize the linear model:
1507+
n = nrow(X); p = ncol(X)
1508+
1509+
# Regression coefficients: depending on p >= n or p < n
1510+
if(p >= n){
1511+
beta = sampleFastGaussian(Phi = X, Ddiag = rep(1, p), alpha = y)
1512+
} else beta = lm(y ~ X - 1)$coef
1513+
1514+
# Fitted values:
1515+
mu = X%*%beta
1516+
1517+
#Mean at the test points (if passed in)
1518+
if(!is.null(X_test)) mu_test = X_test%*%beta
1519+
1520+
# Observation SD:
1521+
sigma = sd(y - mu)
1522+
1523+
# Named list of coefficients:
1524+
coefficients = list(beta = beta)
1525+
1526+
result = list(mu = mu, sigma = sigma, coefficients = coefficients)
1527+
if(!is.null(X_test)){
1528+
result = c(result, list(mu_test = mu_test))
1529+
}
1530+
return(result)
1531+
}
1532+
#' Sample the linear regression parameters assuming a g-prior
1533+
#'
1534+
#' Sample the parameters for a linear regression model assuming a
1535+
#' g-prior for the coefficients.
1536+
#'
1537+
#' @param y \code{n x 1} vector of data
1538+
#' @param X \code{n x p} matrix of predictors
1539+
#' @param params the named list of parameters containing
1540+
#' \enumerate{
1541+
#' \item \code{mu}: vector of conditional means (fitted values)
1542+
#' \item \code{sigma}: the conditional standard deviation
1543+
#' \item \code{coefficients}: a named list of parameters that determine \code{mu}
1544+
#' }
1545+
#' @param psi the prior variance for the g-prior
1546+
#' @param XtX the \code{p x p} matrix of \code{crossprod(X)} (one-time cost);
1547+
#' if NULL, compute within the function
1548+
#' @param X_test matrix of predictors at test points (default is NULL)
1549+
#'
1550+
#' @return The updated named list \code{params} with draws from the full conditional distributions
1551+
#' of \code{sigma} and \code{coefficients} (along with updated \code{mu} and \code{mu_test} if applicable).
1552+
#'
1553+
#' @note The parameters in \code{coefficients} are:
1554+
#' \itemize{
1555+
#' \item \code{beta}: the \code{p x 1} vector of regression coefficients
1556+
#' components of \code{beta}
1557+
#' }
1558+
#'
1559+
#' @examples
1560+
#' # Simulate data for illustration:
1561+
#' sim_dat = simulate_nb_lm(n = 100, p = 5)
1562+
#' y = sim_dat$y; X = sim_dat$X
1563+
#' # Initialize:
1564+
#' params = init_lm_gprior(y = y, X = X)
1565+
#' # Sample:
1566+
#' params = sample_lm_gprior(y = y, X = X, params = params)
1567+
#' names(params)
1568+
#' names(params$coefficients)
1569+
#'
1570+
#' @import truncdist
1571+
#' @export
1572+
sample_lm_gprior = function(y, X, params, psi = NULL, XtX = NULL, X_test=NULL){
1573+
1574+
# Dimensions:
1575+
n = nrow(X); p = ncol(X)
1576+
1577+
if(is.null(psi)) psi = n # default
1578+
1579+
# For faster computations:
1580+
if(is.null(XtX)) XtX = crossprod(X)
1581+
1582+
# Access elements of the named list:
1583+
sigma = params$sigma # Observation SD
1584+
coefficients = params$coefficients # Coefficients to access below:
1585+
1586+
beta = coefficients$beta; # Regression coefficients (including intercept)
1587+
1588+
# Sample the regression coefficients:
1589+
Q_beta = 1/sigma^2*(1+psi)/(psi)*XtX
1590+
ell_beta = 1/sigma^2*crossprod(X, y)
1591+
ch_Q = chol(Q_beta)
1592+
beta = backsolve(ch_Q,
1593+
forwardsolve(t(ch_Q), ell_beta) +
1594+
rnorm(p))
1595+
1596+
# Conditional mean:
1597+
mu = X%*%beta
1598+
1599+
#Mean at the test points (if passed in)
1600+
if(!is.null(X_test)) mu_test = X_test%*%beta
1601+
1602+
# Observation SD:
1603+
sigma = 1/sqrt(rgamma(n = 1,
1604+
shape = .001 + n/2,
1605+
rate = .001 + sum((y - mu)^2)/2))
1606+
1607+
# Update the coefficients:
1608+
coefficients$beta = beta
1609+
1610+
result = list(mu = mu, sigma = sigma, coefficients = coefficients)
1611+
if(!is.null(X_test)){
1612+
result = c(result, list(mu_test = mu_test))
1613+
}
1614+
return(result)
1615+
}
1616+
1617+

_pkgdown.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ reference:
2626
- bart_star
2727
- spline_star
2828
- genMCMC_star
29+
- init_lm_gprior
30+
- sample_lm_gprior
2931
- title: WarpDLMs (Count Time Series Modeling)
3032
desc: Methods for warped Dynamic Linear Models to perform Bayesian inference and forecasting for count-valued time series
3133
- contents:

man/init_lm_gprior.Rd

Lines changed: 12 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/sample_lm_gprior.Rd

Lines changed: 13 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)