Skip to content

Commit

Permalink
Merge pull request #320 from USGS-R/develop
Browse files Browse the repository at this point in the history
as for full 2016 data pull
  • Loading branch information
aappling-usgs authored Feb 5, 2017
2 parents dd5110a + d87ca83 commit 0d1c963
Show file tree
Hide file tree
Showing 146 changed files with 10,041 additions and 286 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: streamMetabolizer
Type: Package
Title: Models for Estimating Aquatic Photosynthesis and Respiration
Version: 0.9.33
Date: 2017-01-17
Version: 0.9.38
Date: 2017-02-03
Author: Alison P. Appling, Robert O. Hall, Maite Arroita, and Charles B.
Yackulic
Authors@R: c(
Expand Down
21 changes: 19 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
# 0.9.36

* Models should now be able to accept `tbl_df`s (dplyr/tibble format) for the
`data` and `data_daily` arguments to `metab()`

# 0.9.35

* Bayesian models now distinguish between compilation time and fitting time

* Updates to `plot_distribs` for recent changes to Bayesian models

# 0.9.34

* Bayesian models with `pool_K600 != 'none'` can how have their
`K600_daily_sigma` (or `K600_daily_sdlog`) be a fitted value, a fixed value, or
a value fixed at 0

# 0.9.33

* new function: `calc_light_merged`, which merges modeled and observed light
* new function: `calc_light_merged`, which merges modeled and observed light
into a smooth curve

# 0.9.29
Expand All @@ -10,7 +27,7 @@ model (and therefore also its `info` or `data_daily` slots)

# 0.9.28

* switched from `rlnorm` to `rnorm` for distribution of `K600_daily` around
* switched from `rlnorm` to `rnorm` for distribution of `K600_daily` around
`K600_daily_pred` in linear and binned models

# 0.9.27
Expand Down
69 changes: 45 additions & 24 deletions R/calc_light_merged.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,19 @@
#' @export
#' @examples
#' \dontrun{
#' library(mda.streams)
#' library(dplyr)
#' library(lubridate)
#' library(ggplot2)
#' library(unitted)
#' coords <- get_site_coords('nwis_08062500')
#' PAR.obs <- get_ts('par_calcSw', 'nwis_08062500') %>%
#' mutate(solar.time = convert_UTC_to_solartime(DateTime, coords$lon)) %>%
#' select(solar.time, light=par) %>%
#' subset(solar.time %within% interval(ymd("2008-03-10"), ymd("2008-03-14"), tz=tz(solar.time)) &
#' !(solar.time %within% interval(as.POSIXct("2008-03-12 10:00", tz=tz(solar.time)),
#' as.POSIXct("2008-03-12 14:00", tz=tz(solar.time)))))
#' PAR.mod <- u(data.frame(solar.time=seq(as.POSIXct('2008-03-11', tz=tz(PAR.obs$solar.time)),
#' as.POSIXct('2008-03-15', tz=tz(PAR.obs$solar.time)), by=as.difftime(10, units='mins'))) %>%
#' mutate(light=calc_light(u(solar.time), latitude=coords$lat, longitude=coords$lon)))
#' timebounds <- as.POSIXct(c('2008-03-12 00:00', '2008-03-12 23:59'), tz='UTC')
#' coords <- list(lat=32.4, lon=-96.5)
#' PAR.obs <- data_frame(
#' solar.time=seq(timebounds[1], timebounds[2], by=as.difftime(3, units='hours')),
#' light=c(0, 0, 85.9, 1160.5, 1539.0, 933.9, 0, 0)
#' ) %>% as.data.frame()
#' PAR.mod <- data_frame(
#' solar.time=seq(timebounds[1], timebounds[2], by=as.difftime(0.25, units='hours')),
#' light=calc_light(solar.time, latitude=coords$lat, longitude=coords$lon)
#' ) %>% as.data.frame()
#' PAR.merged <- calc_light_merged(PAR.obs, PAR.mod$solar.time,
#' latitude=coords$lat, longitude=coords$lon, max.gap=as.difftime(20, units='hours'))
#' ggplot(bind_rows(mutate(v(PAR.obs), type='obs'), mutate(v(PAR.mod), type='mod'),
Expand All @@ -51,6 +49,22 @@ calc_light_merged <- function(
solar.time, latitude, longitude, max.PAR=NA,
max.gap=as.difftime(3, units="hours"), attach.units=is.unitted(PAR.obs)) {

# ensure units are correct and present within this function
arg_units <- list(
PAR.obs=get_units(mm_data(solar.time, light)),
solar.time='',
latitude='degN',
longitude='degE')
for(argname in names(arg_units)) {
arg <- get(argname)
unit <- arg_units[[argname]]
if(is.unitted(arg)) {
stopifnot(all(get_units(arg) == unit))
} else {
assign(argname, u(arg, unit))
}
}

. <- is.mod <- obs <- mod <- resid.abs.int <- resid.prop.int <- merged <- '.dplyr.var'

# set smart default for max.PAR to make the ts pretty
Expand Down Expand Up @@ -94,27 +108,34 @@ calc_light_merged <- function(
}

# create a 'merged' time series where modeled values are adjusted to flow
# through observed values
# through observed values.

# compute the modeled values and then the residuals as both differences and
# proportions, dealing with 0 specially
PAR.merged <- PAR.merged %>%
mutate(
# model light
mod = calc_light(solar.time, latitude, longitude, max.PAR),
# compute the residuals as both differences and proportions, dealing with NA and 0 specially
resid.abs = ifelse(is.na(obs), 0, obs-mod),
resid.prop = ifelse(mod==u(0, 'umol m^-2 s^-1') | (is.na(obs) & mod==u(0, 'umol m^-2 s^-1')), 1, obs/mod))
# interpolate the residuals to match up with every modeled light value. pipes fail with this approx call, so use boring notation
PAR.merged$resid.abs.int <- approx(x=PAR.merged$solar.time, y=PAR.merged$resid.abs, xout=PAR.merged$solar.time, rule=2)$y
if(is.unitted(PAR.merged)) PAR.merged$resid.abs.int <- u(PAR.merged$resid.abs.int, get_units(PAR.merged$obs))
PAR.merged$resid.prop.int <- approx(x=PAR.merged$solar.time, y=PAR.merged$resid.prop, xout=PAR.merged$solar.time, rule=2)$y
resid.abs = obs - mod,
resid.prop = ifelse(mod==u(0, 'umol m^-2 s^-1'), NA, obs / mod))

# interpolate the residuals to match up with every modeled light value. pipes
# fail with this approx call, so use boring notation
PAR.obsonly <- PAR.merged[!is.na(PAR.merged$obs), ]
PAR.merged$resid.abs.int <- approx(x=PAR.obsonly$solar.time, y=v(PAR.obsonly$resid.abs), xout=v(PAR.merged$solar.time), rule=2)$y
PAR.merged$resid.prop.int <- approx(x=PAR.obsonly$solar.time, y=PAR.obsonly$resid.prop, xout=PAR.merged$solar.time, rule=2)$y

# do the correction from mod scale to obs scale
PAR.merged <- PAR.merged %>%
# do the correction from mod scale to obs scale. purely absolute or purely proportional can give some funky values, so use the
mutate(merged = u(ifelse(resid.prop.int <= 1, mod * resid.prop.int, mod + resid.abs.int), get_units(mod)))
mutate(
merged = u(ifelse(resid.prop.int <= 1, mod * resid.prop.int, pmax(0, v(mod) + resid.abs.int)), get_units(mod)))

# collect just the rows and cols we want
PAR.merged <- PAR.merged %>%
# collect just the rows and cols we want
subset(is.mod) %>%
select(solar.time, light=merged)

if(!attach.units) PAR.merged <- v(PAR.merged)

# return
PAR.merged
}
Expand Down
6 changes: 0 additions & 6 deletions R/calc_velocity.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@
#' @param m exponent in velocity-discharge relation (unitless; f in Raymond et al.)
#' @return v (= V = U), stream flow velcoity, in the same units as k
#' @examples
#' \dontrun{
#' # Warning: this function is deprecated.
#' Qs <- seq(1,9,2)
#' calc_velocity(Q=Qs)
#' calc_velocity(Q=Qs, k=0.4)
#' library(unitted)
#' calc_velocity(Q=u(Qs, "m^3 s^-1"), m=u(40))
#' calc_velocity(Q=u(Qs, "m^3 s^-1"), k=u(0.36, "m s^-1"))
#' }
#' @references Raymond, Peter A., Christopher J. Zappa, David Butman, Thomas L.
#' Bott, Jody Potter, Patrick Mulholland, Andrew E. Laursen, William H.
#' McDowell, and Denis Newbold. \emph{Scaling the gas transfer velocity and
Expand All @@ -33,9 +30,6 @@
#' @export
calc_velocity <- function(Q, k=u(0.194,"m s^-1"), m=u(0.285,"")) {

.Deprecated()
warning("submit a GitHub issue if you want calc_velocity() to stick around")

with.units <- is.unitted(Q) || (if(!missing(k)) is.unitted(k) else FALSE) || (if(!missing(m)) is.unitted(m) else FALSE)

if(with.units) {
Expand Down
11 changes: 6 additions & 5 deletions R/metab_Kmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -398,13 +398,14 @@ get_params.metab_Kmodel <- function(
K600.daily.sd = fit[['se']])
},
lm = {
preds <- predict(fit, newdata=data_daily, interval='confidence', level=0.95)
preds <- predict(fit, newdata=data_daily, interval='confidence', level=0.95) %>%
as.data.frame()
params %<>% mutate(
K600.daily = preds[,'fit'], # only the approx mean if ktrans=='log'
K600.daily = preds[['fit']], # only the approx mean if ktrans=='log'
K600.daily.sd = NA, # this shouldn't be necessary after resolving #238
K600_daily_50pct = preds[,'fit'],
K600_daily_2.5pct = preds[,'lwr'],
K600_daily_97.5pct = preds[,'upr'])
K600_daily_50pct = preds[['fit']],
K600_daily_2.5pct = preds[['lwr']],
K600_daily_97.5pct = preds[['upr']])
},
loess = {
preds <- predict(fit, newdata=data_daily, se=TRUE)
Expand Down
42 changes: 26 additions & 16 deletions R/metab_bayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,16 @@ metab_bayes <- function(
# Check data for correct column names & units
dat_list <- mm_validate_data(if(missing(data)) NULL else data, if(missing(data_daily)) NULL else data_daily, "metab_bayes")
num_discharge_cols <- length(grep('discharge', c(names(dat_list$data), names(dat_list$data_daily))))
pool_K600 <- mm_parse_name(specs$model_name)$pool_K600
if(xor(num_discharge_cols > 0, pool_K600 %in% c('linear','binned')))
stop('discharge data should be included if & only if pool_K600 indicates hierarchy')
pool_K600_type <- mm_parse_name(specs$model_name, expand = TRUE)$pool_K600_type
if(xor(num_discharge_cols > 0, pool_K600_type %in% c('linear','binned')))
stop('discharge data should be included if & only if pool_K600_type indicates hierarchy')
if(num_discharge_cols > 1)
stop('either discharge or discharge.daily may be specified, but not both')

# Handle discharge. If K600 is a hierarchical function of discharge and
# data$discharge was given, compute daily discharge and store in data.daily
# where it'll be accessible for the user to inspect it after model fitting
if((pool_K600 %in% c('linear','binned')) && ('discharge' %in% names(dat_list$data))) {
if((pool_K600_type %in% c('linear','binned')) && ('discharge' %in% names(dat_list$data))) {
# calculate daily discharge
dailymean <- function(data_ply, data_daily_ply, day_start, day_end, ply_date, ply_validity, timestep_days, ...) {
data.frame(discharge.daily = if(isTRUE(ply_validity[1])) mean(data_ply$discharge) else NA)
Expand Down Expand Up @@ -111,7 +111,7 @@ metab_bayes <- function(
dat_list$data_daily$lnQ.daily <- log(v(dat_list$data_daily$discharge.daily))
}
# If we need discharge bins, compute & store those now, as well
if(pool_K600 %in% c('binned')) {
if(pool_K600_type %in% c('binned')) {
# linear interpolation from node to node, horizontal at the edges
bounds <- c(-Inf, specs$K600_lnQ_nodes_centers, Inf)
cuts <- cut(dat_list$data_daily$lnQ.daily, breaks=bounds, ordered_result=TRUE)
Expand Down Expand Up @@ -176,6 +176,7 @@ metab_bayes <- function(
{if(!is.null(.)) setNames(., 'Compilation') else .}
log <- extract_object_list('log') %>% { setNames(., paste0('MCMC_', names(.))) }
bayes_log <- c(compile_log, log)
bayes_compile_time <- bayes_all_list[['compile_time']]
bayes_mcmc <- extract_object_list('mcmcfit')
bayes_mcmc_data <- extract_object_list('mcmc_data')
bayes_all <- list(daily=bayes_daily)
Expand All @@ -195,10 +196,11 @@ metab_bayes <- function(
. <- '.dplyr.var'
bayes_log <- bayes_all_list[c('compile_log', 'log')] %>%
setNames(c('Compilation','MCMC_All_Days')) %>% { .[!sapply(., is.null)] }
bayes_compile_time <- bayes_all_list[['compile_time']]
bayes_mcmc <- bayes_all_list$mcmcfit
bayes_mcmc_data <- bayes_all_list$mcmc_data
# now a list of dfs, log, warnings, and errors
bayes_all <- bayes_all_list[!(names(bayes_all_list) %in% c('compile_log','log','mcmcfit','mcmc_data'))]
bayes_all <- bayes_all_list[!(names(bayes_all_list) %in% c('compile_log','compile_time','log','mcmcfit','mcmc_data'))]
}
})

Expand All @@ -210,7 +212,8 @@ metab_bayes <- function(
log=bayes_log,
mcmc=bayes_mcmc,
mcmc_data=bayes_mcmc_data,
fitting_time=fitting_time,
fitting_time=fitting_time - bayes_compile_time,
compile_time=bayes_compile_time,
specs=specs,
data=dat_list$data, # keep the units if given
data_daily=dat_list$data_daily)
Expand Down Expand Up @@ -483,7 +486,7 @@ prepdata_bayes <- function(
if(n24 > num_daily_obs) stop("day_end - day_start < 24 hours; aborting because daily metabolism could be wrong")

# parse model name into features for deciding what data to include
features <- mm_parse_name(model_name)
features <- mm_parse_name(model_name, expand=TRUE)

# Format the data for Stan. Stan disallows period-separated names, so
# change all the input data to underscore-separated. parameters given in
Expand All @@ -501,7 +504,7 @@ prepdata_bayes <- function(
),

switch(
features$pool_K600,
features$pool_K600_type,
linear = list(lnQ_daily = data_daily$lnQ.daily),
binned = list(
b = length(specs$K600_lnQ_nodes_centers),
Expand Down Expand Up @@ -533,7 +536,7 @@ prepdata_bayes <- function(

specs[specs$params_in]
)
if(features$pool_K600 == 'binned') {
if(features$pool_K600_type == 'binned') {
data_list$K600_lnQ_nodes_meanlog <- array(data_list$K600_lnQ_nodes_meanlog, dim=data_list$b)
data_list$K600_lnQ_nodes_sdlog <- array(data_list$K600_lnQ_nodes_sdlog, dim=data_list$b)
}
Expand Down Expand Up @@ -604,12 +607,15 @@ runstan_bayes <- function(data_list, model_path, params_out, split_dates, keep_m

# use auto_write=TRUE to recompile if needed, or load from existing .rds file
# without recompiling if possible
compile_time <- system.time({})
mobj_path <- gsub('.stan$', '.stanrds', model_path)
if(!file.exists(mobj_path) || file.info(mobj_path)$mtime < file.info(model_path)$mtime) {
if(verbose) message("compiling Stan model")
compile_log <- capture.output({
stan_mobj <- rstan::stan_model(file=model_path, auto_write=TRUE)
}, type=c('output'), split=verbose)
compile_time <- system.time({
compile_log <- capture.output({
stan_mobj <- rstan::stan_model(file=model_path, auto_write=TRUE)
}, type=c('output'), split=verbose)
})
rm(stan_mobj)
gc() # this humble line saves us from many horrible R crashes
autowrite_path <- gsub('.stan$', '.rds', model_path)
Expand Down Expand Up @@ -681,7 +687,10 @@ runstan_bayes <- function(data_list, model_path, params_out, split_dates, keep_m
newlogfiles <- normalizePath(file.path(tempdir(), grep("_StanProgress.txt", dir(tempdir()), value=TRUE)))
logfile <- setdiff(newlogfiles, oldlogfiles)
log <- if(length(logfile) > 0) readLines(logfile) else consolelog
stan_out <- c(stan_out, c(list(log=log), if(exists('compile_log')) list(compile_log=compile_log)))
stan_out <- c(stan_out, c(
list(log=log),
if(exists('compile_log')) list(compile_log=compile_log),
list(compile_time=compile_time)))

return(stan_out)
}
Expand Down Expand Up @@ -770,7 +779,7 @@ format_mcmc_mat_nosplit <- function(mcmc_mat, data_list_d, data_list_n, keep_mcm
setClass(
"metab_bayes",
contains="metab_model",
slots=c(log="ANY", mcmc="ANY", mcmc_data="ANY")
slots=c(log="ANY", mcmc="ANY", mcmc_data="ANY", compile_time="ANY")
)

#' Extract any MCMC model objects that were stored with the model
Expand Down Expand Up @@ -957,6 +966,7 @@ get_params.metab_bayes <- function(metab_model, date_start=NA, date_end=NA, unce
}
names(metab_model@fit$daily) <- gsub('_mean$', '', names(metab_model@fit$daily))
names(metab_model@fit$daily) <- gsub('_sd$', '.sd', names(metab_model@fit$daily))
names(metab_model@fit$daily) <- gsub('_50pct$', '.median', names(metab_model@fit$daily))
names(metab_model@fit$daily) <- gsub('_2.5pct$', '.lower', names(metab_model@fit$daily))
names(metab_model@fit$daily) <- gsub('_97.5pct$', '.upper', names(metab_model@fit$daily))
# code duplicated in get_params.metab_Kmodel:
Expand All @@ -970,6 +980,6 @@ get_params.metab_bayes <- function(metab_model, date_start=NA, date_end=NA, unce
dmsg <- metab_model@fit$daily$errors
metab_model@fit$daily$errors <- ifelse(dmsg == '', omsg, paste(omsg, dmsg, sep=';'))
}
metab_model@fit <- metab_model@fit$daily # SUPER-TEMPORARY we're still converting fit$daily to fit until #247, #229
metab_model@fit <- metab_model@fit$daily # TEMPORARY we're still converting fit$daily to fit until #247, #229
NextMethod()
}
13 changes: 11 additions & 2 deletions R/metab_model.get_params.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ get_params.metab_model <- function(
# add uncertainty columns if requested
if(uncertainty != 'none') {
metab.vars <- metab.out
metab.uncert <- matrix(paste0(rep(metab.out, each=3), rep(c('.sd','.lower','.upper'), times=length(metab.out))), nrow=3, byrow=FALSE)
suffixes <- c('.sd','.median','.lower','.upper')
metab.uncert <- matrix(paste0(rep(metab.out, each=length(suffixes)), rep(suffixes, times=length(metab.out))), nrow=length(suffixes), byrow=FALSE)
metab.out <- c(rbind(metab.out, metab.uncert)) %>% { .[. %in% names(pars)]}
}

Expand All @@ -91,16 +92,24 @@ get_params.metab_model <- function(

# convert sds to CIs if requested
if(uncertainty == 'sd') {
extra.cols <- grep('\\.lower$|\\.upper$', names(params))
extra.cols <- grep('\\.median$|\\.lower$|\\.upper$', names(params))
if(length(extra.cols) > 0) params <- params[-extra.cols]
} else if(uncertainty == 'ci') {
# use existing .lower and .upper cols if available
for(mv in metab.vars) {
if(all(paste0(mv,c('.lower','.upper')) %in% names(params))) {
extra.cols <- grep(paste0(mv,'\\.sd$'), names(params))
if(length(extra.cols) > 0) params <- params[-extra.cols]
# if we're using existing .lower and .upper cols, also try to use existing .median col
if(paste0(mv,'.median') %in% names(params)) {
params[[mv]] <- params[[paste0(mv,'.median')]] # copy .median over/into un-suffixed name
}
}
}
# remove any '.median' columns; by now we've either used them or have no
# use for them
extra.cols <- grep(paste0('\\.median$'), names(params))
if(length(extra.cols) > 0) params <- params[-extra.cols]
# convert any remaining .sd cols to .lower and .upper parametrically
params <- mm_sd_to_ci(params)
}
Expand Down
2 changes: 1 addition & 1 deletion R/metab_model.show.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ setMethod(
}
cat("streamMetabolizer version", object@pkg_version, "\n")
print(get_specs(object), header="Specifications:\n", prefix=" ")
cat("Fitting time: ", get_fitting_time(object)[['elapsed']], " secs elapsed\n", sep="")
cat("Fitting time: ", tryCatch(get_fitting_time(object)[['elapsed']], error=function(e) "NA"), " secs elapsed\n", sep="")

# set the sim.seed for a sec so the params and metab preds look the same.
# setting the seed makes a copy of object, so we won't need to reset the
Expand Down
5 changes: 4 additions & 1 deletion R/metab_model_interface.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,10 @@ get_version <- function(metab_model) {
#' @param uncertainty character. Should columns for the uncertainty of parameter
#' estimates be excluded ('none'), reported as standard deviations ('sd'), or
#' reported as lower and upper bounds of a 95 percent confidence interval
#' ('ci')?
#' ('ci')? When available (e.g., for Bayesian models), if 'ci' then the
#' central value will be the median (50th quantile) and the ranges will be the
#' 2.5th and 97.5th quantiles. If 'sd' then the central value will always be
#' the mean.
#' @param messages logical. Should warning and error messages from the fitting
#' procedure be included in the output?
#' @param fixed character. Should values pulled from data_daily (i.e., fixed
Expand Down
Loading

0 comments on commit 0d1c963

Please sign in to comment.