diff --git a/DESCRIPTION b/DESCRIPTION index 74a9688..c33b61f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,19 @@ Package: psycho Type: Package Title: Efficient and Publishing-Oriented Workflow for Psychological Science -Version: 0.3.5 +Version: 0.3.6 Authors@R: c( person("Dominique", "Makowski", email = "dom.makowski@gmail.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0001-5375-9967")), + person("Hugo", + "Najberg", + role = "ctb"), person("Viliam", "Simko", - role = "ctb", - email = "viliam.simko@gmail.com"), + role = "ctb"), person("Sasha", "Epskamp", role = "rev", @@ -26,7 +28,7 @@ Description: The main goal of the psycho package is to provide tools for psychol License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 Depends: R (>= 3.4.0) Imports: diff --git a/NAMESPACE b/NAMESPACE index 5e9c563..a18c8b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,7 +12,7 @@ S3method(analyze,lm) S3method(analyze,lmerModLmerTest) S3method(analyze,stanreg) S3method(as.data.frame,density) -S3method(find_best_model,merModLmerTest) +S3method(find_best_model,lmerModLmerTest) S3method(find_best_model,stanreg) S3method(find_combinations,formula) S3method(get_R2,glm) @@ -22,6 +22,9 @@ S3method(get_R2,stanreg) S3method(get_contrasts,glmerMod) S3method(get_contrasts,lmerModLmerTest) S3method(get_contrasts,stanreg) +S3method(get_data,lm) +S3method(get_data,merMod) +S3method(get_data,stanreg) S3method(get_formula,glm) S3method(get_formula,glmerMod) S3method(get_formula,lm) @@ -50,6 +53,9 @@ S3method(standardize,numeric) S3method(standardize,stanreg) S3method(summary,psychobject) export(.fa_variance_text) +export(HDI) +export(HDImax) +export(HDImin) export(R2_LOO_Adjusted) export(R2_nakagawa) export(R2_tjur) @@ -82,7 +88,6 @@ export(get_formula) export(get_info) export(get_loadings_max) export(get_predicted) -export(hdi) export(interpret_R2) export(interpret_R2_posterior) export(interpret_RMSEA) @@ -115,6 +120,7 @@ export(remove_empty_cols) export(reorder_matrix) export(rnorm_perfect) export(rope) +export(simulate_data_regression) export(standardize) export(values) import(broom) @@ -173,6 +179,7 @@ importFrom(stats,dnorm) importFrom(stats,ecdf) importFrom(stats,family) importFrom(stats,formula) +importFrom(stats,getCall) importFrom(stats,hclust) importFrom(stats,mad) importFrom(stats,median) @@ -204,5 +211,6 @@ importFrom(stringr,str_trim) importFrom(tibble,rownames_to_column) importFrom(utils,capture.output) importFrom(utils,combn) +importFrom(utils,data) importFrom(utils,head) importFrom(utils,tail) diff --git a/NEWS.md b/NEWS.md index b511a05..6943fd3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,10 @@ # Current Dev ### Breaking changes +- Changed `hdi` to `HDI` - Removed Mean and SD from summary in `bayes_cor.test` ### New functions / parameters +- Added `simulate_data_regression` - Extended `standardize` method - Added `get_R2` method - Added `interpret_odds` and logistic regression effect size interpretation @@ -163,7 +165,7 @@ ### Breaking changes ### New functions / parameters -- `hdi`: Compute highest density intervals +- `HDI`: Compute highest density intervals - `format_string`: A tidyverse friendly version of `sprintf` ### Major changes - Changed credible interval computation in analyze.stanreg diff --git a/R/analyze.htest.R b/R/analyze.htest.R index c5b5041..0d4bc72 100644 --- a/R/analyze.htest.R +++ b/R/analyze.htest.R @@ -48,6 +48,8 @@ analyze.htest <- function(x, effsize_rules="cohen1988", ...) { # Text # ------------- + + # CORRELATION if (grepl("correlation", values$method)) { text <- paste0( "The ", @@ -68,6 +70,8 @@ analyze.htest <- function(x, effsize_rules="cohen1988", ...) { format_p(values$p, stars = FALSE), ")." ) + + # T-TEST } else if (grepl("t-test", values$method)) { if (names(x$null.value) == "mean") { means <- paste0( @@ -93,10 +97,6 @@ analyze.htest <- function(x, effsize_rules="cohen1988", ...) { vars <- paste0(values$names, " (", means, ")") } - - - - values$effect <- values$effect[1] - values$effect[2] text <- paste0( @@ -117,6 +117,7 @@ analyze.htest <- function(x, effsize_rules="cohen1988", ...) { format_p(values$p, stars = FALSE), ")." ) + # OTHER } else { stop(paste0("The ", values$method, " is not implemented yet.")) } diff --git a/R/analyze.lm.R b/R/analyze.lm.R index f335f3e..466a65a 100644 --- a/R/analyze.lm.R +++ b/R/analyze.lm.R @@ -50,7 +50,7 @@ analyze.lm <- function(x, CI=95, effsize_rules="cohen1988", ...) { summary$p <- summary$`Pr...t..` # standardized coefficients - standardized <- tibble::rownames_to_column(standardize(fit, method = "refit"), "Variable") + standardized <- tibble::rownames_to_column(standardize(fit, method = "refit", data=data), "Variable") summary <- merge(summary, standardized, by = "Variable", all.x = TRUE, sort = FALSE) summary$Effect_Size <- c(NA, interpret_d(tail(summary$Coef_std, -1), rules = effsize_rules)) diff --git a/R/analyze.stanreg.R b/R/analyze.stanreg.R index a1b52fa..c189d2c 100644 --- a/R/analyze.stanreg.R +++ b/R/analyze.stanreg.R @@ -4,6 +4,9 @@ #' #' @param x A stanreg model. #' @param CI Credible interval bounds. +#' @param index Index of effect existence to report. Can be 'overlap' or 'ROPE'. +#' @param ROPE_bounds Bounds ot the ROPE. If NULL and effsize is TRUE, than the ROPE. +#' will have default values c(-0.1, 0.1) and computed on the standardized posteriors. #' @param effsize Compute Effect Sizes according to Cohen (1988). For linear models only. #' @param effsize_rules Grid for effect size interpretation. See \link[=interpret_d]{interpret_d}. #' @param ... Arguments passed to or from other methods. @@ -15,6 +18,7 @@ #' \item{the Credible Interval (CI) (by default, the 90\% CI; see Kruschke, 2018), representing a range of possible parameter.} #' \item{the Maximum Probability of Effect (MPE), the probability that the effect is positive or negative (depending on the median’s direction).} #' \item{the Overlap (O), the percentage of overlap between the posterior distribution and a normal distribution of mean 0 and same SD than the posterior. Can be interpreted as the probability that a value from the posterior distribution comes from a null distribution.} +#' \item{the ROPE, the proportion of the 95\% CI of the posterior distribution that lies within the region of practical equivalence.} #' } #' #' @examples @@ -62,7 +66,7 @@ #' @importFrom broom tidy #' @importFrom stringr str_squish str_replace #' @export -analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", ...) { +analyze.stanreg <- function(x, CI=90, index="overlap", ROPE_bounds=NULL, effsize=FALSE, effsize_rules="cohen1988", ...) { fit <- x # Info -------------------------------------------------------------------- @@ -164,7 +168,9 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", CI = CI, effsize = effsize, effsize_rules = effsize_rules, - fit = fit + fit = fit, + index=index, + ROPE_bounds=ROPE_bounds ) } } @@ -377,7 +383,7 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", values$mad <- mad(posterior) values$mean <- mean(posterior) values$sd <- sd(posterior) - values$CI_values <- hdi(posterior, prob = CI / 100) + values$CI_values <- HDI(posterior, prob = CI / 100) values$CI_values <- c(values$CI_values$values$HDImin, values$CI_values$values$HDImax) values$MPE <- NA values$MPE_values <- NA @@ -454,7 +460,7 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", values$mad <- mad(posterior) values$mean <- mean(posterior) values$sd <- sd(posterior) - values$CI_values <- hdi(posterior, prob = CI / 100) + values$CI_values <- HDI(posterior, prob = CI / 100) values$CI_values <- c(values$CI_values$values$HDImin, values$CI_values$values$HDImax) values$MPE <- NA values$MPE_values <- NA @@ -507,7 +513,18 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", #' @keywords internal -.process_effect <- function(varname, posteriors, posteriors_std, info_priors, predictors, CI=90, effsize=FALSE, effsize_rules=FALSE, fit) { +.process_effect <- function(varname, + posteriors, + posteriors_std, + info_priors, + predictors, + CI=90, + effsize=FALSE, + effsize_rules=FALSE, + fit, + index="overlap", + ROPE_bounds=NULL) { + values <- .get_info_priors(varname, info_priors, predictors) posterior <- posteriors[, varname] @@ -518,10 +535,12 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", values$mad <- mad(posterior) values$mean <- mean(posterior) values$sd <- sd(posterior) - values$CI_values <- hdi(posterior, prob = CI / 100) + values$CI_values <- HDI(posterior, prob = CI / 100) values$CI_values <- c(values$CI_values$values$HDImin, values$CI_values$values$HDImax) values$MPE <- mpe(posterior)$MPE values$MPE_values <- mpe(posterior)$values + + # Index values$overlap <- 100 * overlap( posterior, rnorm_perfect( @@ -531,6 +550,48 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", ) ) + if(!is.null(ROPE_bounds)){ + rope <- rope(posterior, bounds=ROPE_bounds) + values$ROPE_decision <- rope$rope_decision + values$ROPE <- rope$rope_probability + } + + if(index == "overlap"){ + index <- paste0("Overlap = ", + format_digit(values$overlap, null_treshold = 0.01), + "%).") + } else if(index == "ROPE"){ + if(!is.null(ROPE_bounds)){ + index <- paste0("ROPE = ", + format_digit(values$ROPE, null_treshold = 0.001), + ").") + } else{ + if(effsize == TRUE){ + rope <- rope(posteriors_std[, varname], bounds=c(-0.1, 0.1)) + values$ROPE_decision <- rope$rope_decision + values$ROPE <- rope$rope_probability + index <- paste0("ROPE = ", + format_digit(values$ROPE, null_treshold = 0.001), + ").") + } else{ + warning("you need to specify ROPE_bounds (e.g. 'c(-0.1, 0.1)'). Computing overlap instead.") + index <- paste0("Overlap = ", + format_digit(values$overlap, null_treshold = 0.01), + "%).") + } + } + + } else{ + warning("Parameter 'index' should be 'overlap' or 'ROPE'. Computing overlap.") + index <- paste0("Overlap = ", + format_digit(values$overlap, null_treshold = 0.01), + "%).") + } + + + + + # Text if (grepl(":", varname)) { splitted <- strsplit(varname, ":")[[1]] @@ -564,9 +625,7 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", "% CI [", format_digit(values$CI_values[1], null_treshold = 0.0001), ", ", format_digit(values$CI_values[2], null_treshold = 0.0001), "], ", - "Overlap = ", - format_digit(values$overlap, null_treshold = 0.01), - "%)." + index ) @@ -579,7 +638,7 @@ analyze.stanreg <- function(x, CI=90, effsize=FALSE, effsize_rules="cohen1988", values$std_mad <- mad(posterior_std) values$std_mean <- mean(posterior_std) values$std_sd <- sd(posterior_std) - values$std_CI_values <- hdi(posterior_std, prob = CI / 100) + values$std_CI_values <- HDI(posterior_std, prob = CI / 100) values$std_CI_values <- c(values$std_CI_values$values$HDImin, values$std_CI_values$values$HDImax) if (fit$family$family == "binomial" & fit$family$link == "logit") { diff --git a/R/bayes_cor.R b/R/bayes_cor.R index 35ff538..e919cd6 100644 --- a/R/bayes_cor.R +++ b/R/bayes_cor.R @@ -69,8 +69,8 @@ bayes_cor.test <- function(x, y, CI=90, iterations = 10000, effsize_rules_r="coh values$mad <- mad(posterior) values$mean <- mean(posterior) values$sd <- sd(posterior) - values$CI <- hdi(posterior, prob = CI / 100)$text - values$CI_values <- hdi(posterior, prob = CI / 100) + values$CI <- HDI(posterior, prob = CI / 100)$text + values$CI_values <- HDI(posterior, prob = CI / 100) values$CI_values <- c(values$CI_values$values$HDImin, values$CI_values$values$HDImax) values$MPE <- mpe(posterior)$MPE values$MPE_values <- mpe(posterior)$values diff --git a/R/crawford.test.R b/R/crawford.test.R index 4b67fdf..0d674c3 100644 --- a/R/crawford.test.R +++ b/R/crawford.test.R @@ -95,7 +95,7 @@ crawford.test <- function(patient, pvalues <- pvalues / 2 p <- mean(pvalues) - CI <- hdi(pvalues, prob = CI / 100) + CI <- HDI(pvalues, prob = CI / 100) # CI_1 <- sort(pvalues)[iter * (100 - CI) / 100] diff --git a/R/find_best_model.R b/R/find_best_model.R index 5362404..8b98e0d 100644 --- a/R/find_best_model.R +++ b/R/find_best_model.R @@ -4,7 +4,7 @@ #' documentation for your model's class: #' \itemize{ #' \item{\link[=find_best_model.stanreg]{find_best_model.stanreg}} -#' \item{\link[=find_best_model.merModLmerTest]{find_best_model.merModLmerTest}} +#' \item{\link[=find_best_model.lmerModLmerTest]{find_best_model.lmerModLmerTest}} #' } #' #' @param fit Model diff --git a/R/find_best_model.lmerModLmerTest.R b/R/find_best_model.lmerModLmerTest.R index 06734b8..07372b1 100644 --- a/R/find_best_model.lmerModLmerTest.R +++ b/R/find_best_model.lmerModLmerTest.R @@ -79,4 +79,4 @@ find_best_model.lmerModLmerTest <- function(fit, interaction=TRUE, fixed=NULL, . best <- list(formula = best, by_criterion = by_criterion, table = comparison) return(best) -} +} \ No newline at end of file diff --git a/R/find_best_model.merModLmerTest.R b/R/find_best_model.merModLmerTest.R deleted file mode 100644 index 5db0857..0000000 --- a/R/find_best_model.merModLmerTest.R +++ /dev/null @@ -1,71 +0,0 @@ -#' Returns the best combination of predictors for lmerTest objects. -#' -#' Returns the best combination of predictors for lmerTest objects. -#' -#' @param fit A merModLmerTest object. -#' @param interaction Include interaction term. -#' @param fixed Additional formula part to add at the beginning of -#' each formula -#' @param ... Arguments passed to or from other methods. -#' -#' @return list containing all combinations. -#' -#' @examples -#' \dontrun{ -#' library(psycho) -#' library(lmerTest) -#' -#' data <- standardize(iris) -#' fit <- lmerTest::lmer(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), data=data) -#' -#' best <- find_best_model(fit) -#' best_formula <- best$formula -#' best$table -#' -#' } -#' -#' @author \href{https://dominiquemakowski.github.io/}{Dominique Makowski} -#' -#' @importFrom stats update -#' @import dplyr -#' -#' @method find_best_model merModLmerTest -#' @export -find_best_model.merModLmerTest <- function(fit, interaction=TRUE, fixed=NULL, ...) { - - # Extract infos - combinations <- find_combinations(fit@call$formula, interaction = interaction, fixed = fixed) - - # fit models - models <- c() - for (formula in combinations) { - newfit <- update(fit, formula) - models <- c(models, newfit) - } - - - # Model comparison - comparison <- as.data.frame(do.call("anova", models)) - comparison$formula <- combinations - - - # Best model by criterion - best_aic <- dplyr::arrange_(comparison, "AIC") %>% - dplyr::select_("formula") %>% - head(1) - best_aic <- as.character(best_aic[[1]]) - - best_bic <- dplyr::arrange_(comparison, "BIC") %>% - dplyr::select_("formula") %>% - head(1) - best_bic <- as.character(best_bic[[1]]) - - by_criterion <- data.frame(formula = c(best_aic, best_bic), criterion = c("AIC", "BIC")) - - # Best formula - best <- table(by_criterion$formula) - best <- names(best[which.max(best)]) - - best <- list(formula = best, by_criterion = by_criterion, table = comparison) - return(best) -} diff --git a/R/find_best_model.stanreg.R b/R/find_best_model.stanreg.R index c554b50..54b155b 100644 --- a/R/find_best_model.stanreg.R +++ b/R/find_best_model.stanreg.R @@ -58,8 +58,15 @@ find_best_model.stanreg <- function(fit, interaction=TRUE, fixed=NULL, K=10, k_t formula <- combinations[i] newfit <- update(fit, formula = formula, verbose = FALSE) R2s[[formula]] <- median(rstanarm::bayes_R2(newfit)) - - loo <- rstanarm::loo(newfit, k_treshold = k_treshold) + + + if (!is.null(k_treshold)) { + loo <- rstanarm::loo(newfit, k_treshold = k_treshold) + } else { + loo <- rstanarm::loo(newfit) + } + + complexities[[formula]] <- length(newfit$coefficients) loos[[formula]] <- loo if (K > 1) { @@ -76,18 +83,19 @@ find_best_model.stanreg <- function(fit, interaction=TRUE, fixed=NULL, K=10, k_t loo <- loos[[formula]] kfold <- kfolds[[formula]] complexity <- complexities[[formula]] + Estimates <- loo[["estimates"]] model <- data.frame( formula = formula, complexity = complexity - 1, R2 = R2s[[formula]], - looic = loo$looic, - looic_se = loo$se_looic, - elpd_loo = loo$elpd_loo, - elpd_loo_se = loo$se_elpd_loo, - p_loo = loo$p_loo, - p_loo_se = loo$se_p_loo, - elpd_kfold = kfold$elpd_kfold, - elpd_kfold_se = kfold$se_elpd_kfold + looic = Estimates["looic","Estimate"], + looic_se = Estimates["looic","SE"], + elpd_loo = Estimates["elpd_loo","Estimate"], + elpd_loo_se = Estimates["elpd_loo","SE"], + p_loo = Estimates["p_loo","Estimate"], + p_loo_se = Estimates["p_loo","SE"], + elpd_kfold = Estimates["p_loo","Estimate"], + elpd_kfold_se = Estimates["p_loo","SE"] ) comparison <- rbind(comparison, model) } diff --git a/R/find_combinations.R b/R/find_combinations.R index 900c49b..40fc343 100644 --- a/R/find_combinations.R +++ b/R/find_combinations.R @@ -11,3 +11,96 @@ find_combinations <- function(object, ...) { UseMethod("find_combinations") } + + + + + + + + + + + + + + +#' Generate all combinations of predictors of a formula. +#' +#' Generate all combinations of predictors of a formula. +#' +#' @param object Formula. +#' @param interaction Include interaction term. +#' @param fixed Additional formula part to add at the beginning of +#' each combination. +#' @param ... Arguments passed to or from other methods. +#' +#' @return list containing all combinations. +#' +#' @examples +#' library(psycho) +#' +#' f <- as.formula("Y ~ A + B + C + D") +#' f <- as.formula("Y ~ A + B + C + D + (1|E)") +#' f <- as.formula("Y ~ A + B + C + D + (1|E) + (1|F)") +#' +#' find_combinations(f) +#' +#' @author \href{https://dominiquemakowski.github.io/}{Dominique Makowski} +#' +#' @method find_combinations formula +#' @importFrom utils combn +#' @importFrom stats terms +#' @export +find_combinations.formula <- function(object, interaction=TRUE, fixed=NULL, ...) { + + # Extract infos + formula <- object + vars <- attributes(terms(formula))$term.labels + outcome <- all.vars(formula)[1] + pred <- vars[!grepl("\\|", vars)] + if (length(vars[grepl("\\|", vars)]) > 0) { + random <- paste0(" + (", vars[grepl("\\|", vars)], ")") + } else { + random <- "" + } + + if (is.null(fixed)) { + fixed <- "" + } else { + fixed <- fixed + } + + # Generate combinations + n <- length(pred) + + id <- unlist( + lapply( + 1:n, + function(i) combn(1:n, i, simplify = FALSE) + ) + , + recursive = FALSE + ) + + combinations <- sapply(id, function(i) + paste(paste(pred[i], collapse = " + "))) + + + # Generate interactions + if (interaction == TRUE) { + for (comb in combinations) { + n_signs <- stringr::str_count(comb, "\\+") + if (n_signs > 0) { + new_formula <- comb + for (i in 1:n_signs) { + new_formula <- stringr::str_replace(new_formula, "\\+", "*") + combinations <- c(combinations, new_formula) + } + } + } + } + + combinations <- paste0(outcome, " ~ ", fixed, combinations, paste0(random, collapse = "")) + return(combinations) +} diff --git a/R/find_combinations.formula.R b/R/find_combinations.formula.R deleted file mode 100644 index 245595f..0000000 --- a/R/find_combinations.formula.R +++ /dev/null @@ -1,79 +0,0 @@ -#' Generate all combinations of predictors of a formula. -#' -#' Generate all combinations of predictors of a formula. -#' -#' @param object Formula. -#' @param interaction Include interaction term. -#' @param fixed Additional formula part to add at the beginning of -#' each combination. -#' @param ... Arguments passed to or from other methods. -#' -#' @return list containing all combinations. -#' -#' @examples -#' library(psycho) -#' -#' f <- as.formula("Y ~ A + B + C + D") -#' f <- as.formula("Y ~ A + B + C + D + (1|E)") -#' f <- as.formula("Y ~ A + B + C + D + (1|E) + (1|F)") -#' -#' find_combinations(f) -#' -#' @author \href{https://dominiquemakowski.github.io/}{Dominique Makowski} -#' -#' @method find_combinations formula -#' @importFrom utils combn -#' @importFrom stats terms -#' @export -find_combinations.formula <- function(object, interaction=TRUE, fixed=NULL, ...) { - - # Extract infos - formula <- object - vars <- attributes(terms(formula))$term.labels - outcome <- all.vars(formula)[1] - pred <- vars[!grepl("\\|", vars)] - if (length(vars[grepl("\\|", vars)]) > 0) { - random <- paste0(" + (", vars[grepl("\\|", vars)], ")") - } else { - random <- "" - } - - if (is.null(fixed)) { - fixed <- "" - } else { - fixed <- fixed - } - - # Generate combinations - n <- length(pred) - - id <- unlist( - lapply( - 1:n, - function(i) combn(1:n, i, simplify = FALSE) - ) - , - recursive = FALSE - ) - - combinations <- sapply(id, function(i) - paste(paste(pred[i], collapse = " + "))) - - - # Generate interactions - if (interaction == TRUE) { - for (comb in combinations) { - n_signs <- stringr::str_count(comb, "\\+") - if (n_signs > 0) { - new_formula <- comb - for (i in 1:n_signs) { - new_formula <- stringr::str_replace(new_formula, "\\+", "*") - combinations <- c(combinations, new_formula) - } - } - } - } - - combinations <- paste0(outcome, " ~ ", fixed, combinations, paste0(random, collapse = "")) - return(combinations) -} diff --git a/R/get_contrasts.stanreg.R b/R/get_contrasts.stanreg.R index 3807126..90bafdf 100644 --- a/R/get_contrasts.stanreg.R +++ b/R/get_contrasts.stanreg.R @@ -49,7 +49,7 @@ get_contrasts.stanreg <- function(fit, formula, prob=0.9, ...) { for (name in names(means_posterior)) { var <- means_posterior[[name]] - CI_values <- hdi(var, prob = prob) + CI_values <- HDI(var, prob = prob) CI_values <- c(CI_values$values$HDImin, CI_values$values$HDImax) var <- data.frame( @@ -78,7 +78,7 @@ get_contrasts.stanreg <- function(fit, formula, prob=0.9, ...) { for (name in names(contrasts_posterior)) { var <- contrasts_posterior[[name]] - CI_values <- hdi(var, prob = prob) + CI_values <- HDI(var, prob = prob) CI_values <- c(CI_values$values$HDImin, CI_values$values$HDImax) var <- data.frame( diff --git a/R/get_data.R b/R/get_data.R index 3cea695..e67dbc8 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -7,6 +7,7 @@ #' #' @examples #' \dontrun{ +#' library(tidyverse) #' library(psycho) #' #' df <- mtcars %>% @@ -18,6 +19,7 @@ #' fit <- lm(wt ~ mpg * cyl, data=df) #' fit <- lm(wt ~ cyl * gear, data=df) #' fit <- lmerTest::lmer(wt ~ mpg * gear + (1|cyl), data=df) +#' fit <- rstanarm::stan_lmer(wt ~ mpg * gear + (1|cyl), data=df) #' #' get_data(fit) #' @@ -26,12 +28,28 @@ #' @author \href{https://dominiquemakowski.github.io/}{Dominique Makowski} #' @export get_data <- function(fit, ...) { +UseMethod("get_data") +} + + +#' @importFrom stats getCall +#' @importFrom utils data +#' @export +get_data.lm <- function(fit, ...) { + + tryCatch({ + data <- eval(getCall(fit)$data, environment(formula(fit))) + return(data) + }) + info <- get_info(fit) outcome <- info$outcome predictors <- info$predictors + data <- as.data.frame(model.frame(fit)) + effects <- names(MuMIn::coeffs(fit)) effects <- unique(unlist(stringr::str_split(effects, ":"))) numerics <- predictors[predictors %in% effects] @@ -47,3 +65,16 @@ get_data <- function(fit, ...) { return(as.data.frame(data)) } + +#' @export +get_data.merMod <- get_data.lm + + + + + +#' @export +get_data.stanreg <- function(fit, ...) { + data <- fit$data + return(data) +} diff --git a/R/get_predicted.stanreg.R b/R/get_predicted.stanreg.R index 0c4fbc7..04c7244 100644 --- a/R/get_predicted.stanreg.R +++ b/R/get_predicted.stanreg.R @@ -10,6 +10,7 @@ #' @param draws An integer indicating the number of draws to return. The default and maximum number of draws is the size of the posterior sample. #' @param posterior_predict Posterior draws of the outcome instead of the link function (i.e., the regression "line"). #' @param seed An optional seed to use. +#' @param transform If posterior_predict is False, should the linear predictor be transformed using the inverse-link function? The default is FALSE, in which case the untransformed linear predictor is returned. #' @param ... Arguments passed to or from other methods. #' #' @@ -52,7 +53,7 @@ #' @importFrom dplyr bind_cols #' @importFrom tibble rownames_to_column #' @export -get_predicted.stanreg <- function(fit, newdata="model", prob=0.9, odds_to_probs=TRUE, keep_iterations=FALSE, draws=NULL, posterior_predict=FALSE, seed=NULL, ...) { +get_predicted.stanreg <- function(fit, newdata="model", prob=0.9, odds_to_probs=TRUE, keep_iterations=FALSE, draws=NULL, posterior_predict=FALSE, seed=NULL, transform=FALSE, ...) { # Extract names predictors <- all.vars(as.formula(fit$formula)) @@ -87,7 +88,7 @@ get_predicted.stanreg <- function(fit, newdata="model", prob=0.9, odds_to_probs= # Generate draws ------------------------------------------------------- if (posterior_predict == FALSE) { - posterior <- rstanarm::posterior_linpred(fit, newdata = newdata, re.form = re.form, seed = seed, draws = draws) + posterior <- rstanarm::posterior_linpred(fit, newdata = newdata, re.form = re.form, seed = seed, draws = draws, transform=transform) } else { posterior <- rstanarm::posterior_predict(fit, newdata = newdata, re.form = re.form, seed = seed, draws = draws) } @@ -100,7 +101,7 @@ get_predicted.stanreg <- function(fit, newdata="model", prob=0.9, odds_to_probs= # Credible Interval for (CI in c(prob)) { - pred_y_interval <- hdi(posterior, prob = CI) + pred_y_interval <- HDI(posterior, prob = CI) names(pred_y_interval) <- paste(outcome, "CI", c((1 - CI) / 2 * 100, 100 - ((1 - CI) / 2 * 100)), sep = "_") pred_y <- cbind(pred_y, pred_y_interval) } diff --git a/R/hdi.R b/R/hdi.R index 28a37b9..a90cc1a 100644 --- a/R/hdi.R +++ b/R/hdi.R @@ -9,18 +9,18 @@ #' library(psycho) #' #' distribution <- rnorm(1000, 0, 1) -#' hdi_values <- hdi(distribution) -#' print(hdi_values) -#' plot(hdi_values) -#' summary(hdi_values) +#' HDI_values <- HDI(distribution) +#' print(HDI_values) +#' plot(HDI_values) +#' summary(HDI_values) #' #' x <- matrix(rexp(200), 100) -#' hdi_values <- hdi(x) +#' HDI_values <- HDI(x) #' #' @author \href{https://dominiquemakowski.github.io/}{Dominique Makowski} #' #' @export -hdi <- function(x, prob = .95) { +HDI <- function(x, prob = .95) { # From CI to prob if necessary if (prob > 1 & prob <= 100) { @@ -32,7 +32,7 @@ hdi <- function(x, prob = .95) { HDImin <- c() HDImax <- c() for (col in seq_len(ncol(x))) { - HDI <- .hdi(x[, col], prob = prob) + HDI <- .HDI(x[, col], prob = prob) HDImin <- c(HDImin, HDI[1]) HDImax <- c(HDImax, HDI[2]) } @@ -43,7 +43,7 @@ hdi <- function(x, prob = .95) { } else { # Process # ------------- - HDI <- .hdi(x, prob = prob) + HDI <- .HDI(x, prob = prob) HDImin <- HDI[1] HDImax <- HDI[2] @@ -87,8 +87,42 @@ hdi <- function(x, prob = .95) { } } + + + +#' Highest Density Intervals (HDI) +#' +#' See \link[=HDI]{HDI} +#' +#' @param x A vector of values from a probability distribution (e.g., posterior probabilities from MCMC sampling). +#' @param prob Scalar between 0 and 1, indicating the mass within the credible interval that is to be estimated. +#' +#' @export +HDImin <- function(x, prob = .95) { + HDImin <- HDI(x, prob = prob)$values$HDImin + return(HDImin) +} + +#' Highest Density Intervals (HDI) +#' +#' See \link[=HDI]{HDI} +#' +#' @param x A vector of values from a probability distribution (e.g., posterior probabilities from MCMC sampling). +#' @param prob Scalar between 0 and 1, indicating the mass within the credible interval that is to be estimated. +#' +#' @export +HDImax <- function(x, prob = .95) { + HDImax <- HDI(x, prob = prob)$values$HDImax + return(HDImax) +} + + + + + + #' @keywords internal -.hdi <- function(x, prob) { +.HDI <- function(x, prob) { x <- sort(x) ci.index <- ceiling(prob * length(x)) nCIs <- length(x) - ci.index @@ -97,3 +131,4 @@ hdi <- function(x, prob = .95) { HDImax <- x[which.min(ci.width) + ci.index] return(c(HDImin, HDImax)) } + diff --git a/R/rope.R b/R/rope.R index 599199e..b1a22c6 100644 --- a/R/rope.R +++ b/R/rope.R @@ -26,15 +26,15 @@ rope <- function(posterior, bounds=c(-0.1, 0.1), CI=95, overlap=FALSE) { # Basic rope -------------------------------------------------------------- - hdi_area <- hdi(posterior, CI / 100) - hdi_area <- posterior[dplyr::between( + HDI_area <- HDI(posterior, CI / 100) + HDI_area <- posterior[dplyr::between( posterior, - hdi_area$values$HDImin, - hdi_area$values$HDImax + HDI_area$values$HDImin, + HDI_area$values$HDImax )] - area_within <- hdi_area[dplyr::between(hdi_area, bounds[1], bounds[2])] - area_outside <- hdi_area[!dplyr::between(hdi_area, bounds[1], bounds[2])] + area_within <- HDI_area[dplyr::between(HDI_area, bounds[1], bounds[2])] + area_outside <- HDI_area[!dplyr::between(HDI_area, bounds[1], bounds[2])] p_within <- length(area_within) / length(posterior) p_outside <- length(area_outside) / length(posterior) diff --git a/R/simulate.R b/R/simulate.R new file mode 100644 index 0000000..48ea827 --- /dev/null +++ b/R/simulate.R @@ -0,0 +1,57 @@ +#' Simulates data for single or multiple regression. +#' +#' Simulates data for single or multiple regression. +#' +#' @param coefs Desired theorethical coefs. Can be a single value or a list. +#' @param sample Desired sample size. +#' @param error The error (standard deviation of gaussian noise). +#' +#' @examples +#' library(psycho) +#' +#' data <- simulate_data_regression(coefs=c(0.1, 0.8), sample=50, error=0) +#' fit <- lm(y ~ ., data=data) +#' coef(fit) +#' analyze(fit) +#' +#' @details See https://stats.stackexchange.com/questions/59062/multiple-linear-regression-simulation +#' +#' @author TPArrow +#' +#' @export +simulate_data_regression <- function(coefs=0.5, sample=100, error=0){ + + # Prevent error + coefs[coefs == 0] <- 0.01 + + y <- rnorm(sample, 0, 1) + + n_var <- length(coefs) + X <- scale(matrix(rnorm(sample*(n_var), 0, 1), ncol=n_var)) + X <- cbind(y, X) + + # find the current correlation matrix + cor_0 <- var(X) + + # cholesky decomposition to get independence + chol_0 <- solve(chol(cor_0)) + + X <- X %*% chol_0 + + # create new correlation structure (zeros can be replaced with other r vals) + coefs_structure <- diag(x = 1, nrow=n_var+1, ncol=n_var+1, names = TRUE) + coefs_structure[-1, 1] <- coefs + coefs_structure[1, -1] <- coefs + + X <- X %*% chol(coefs_structure) * sd(y) + mean(y) + X <- X[,-1] + + # Add noise + y <- y + rnorm(sample, 0, error) + + data <- data.frame(X) + names(data) <- paste0("V", 1:n_var) + data$y <- as.vector(y) + + return(data) +} diff --git a/R/standardize.R b/R/standardize.R index a694cab..d73bfa2 100644 --- a/R/standardize.R +++ b/R/standardize.R @@ -60,7 +60,7 @@ standardize <- function(x, ...) { #' #' @export standardize.numeric <- function(x, normalize=FALSE, ...) { - if (all(is.na(x))) { + if (all(is.na(x)) | length(unique(x)) == 2) { return(x) } @@ -326,6 +326,8 @@ standardize.glm <- function(x, method="refit", ...) { # refit method data <- get_data(fit) fit_std <- update(fit, data = standardize(data)) + + coefs <- MuMIn::coefTable(fit_std)[, 1:2] } diff --git a/docs/_posts/preparation/ANOVA_vs_LM.Rmd b/docs/_posts/preparation/ANOVA_vs_LM.Rmd index 5e2a07c..0b8c833 100644 --- a/docs/_posts/preparation/ANOVA_vs_LM.Rmd +++ b/docs/_posts/preparation/ANOVA_vs_LM.Rmd @@ -1,5 +1,5 @@ --- -title: "GLMs: Nested Effects or Interaction?" +title: "ANOVA vs. Linear Model" layout: post output: md_document: @@ -19,21 +19,40 @@ editor_options: library(knitr) ``` -Let's take interest in the relationship between **life satisfaction**, **sex** and **concealing** (the tendency to suppress or hide one's emotions) +Let's take interest in the relationship between **life satisfaction**, **sex** and **concealing** (the tendency to suppress or hide one's emotions). # The Data ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} +# import packages that we will use library(tidyverse) library(psycho) +# import data and select variables that we will use df <- psycho::affective %>% select(Sex, Life_Satisfaction, Concealing) +# get a summmary of the data summary(df) ``` # Primitve Way +```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} +# Splithalf method +df$Concealing_Binary <- ifelse(df$Concealing > median(df$Concealing), "High", "Low") + +anova_results <- aov(Life_Satisfaction ~ Sex * Concealing_Binary, data=df) +analyze(anova_results) +``` + +```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} +df %>% + ggplot(aes(x=Concealing_Binary, y=Life_Satisfaction, colour=Sex, group=Sex)) + + stat_summary(fun.data = "mean_se", geom="pointrange") + + stat_summary(fun.y = "mean", geom="line") +``` + + ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} anova_results <- aov(Life_Satisfaction ~ Sex * Concealing, data=df) analyze(anova_results) @@ -53,7 +72,7 @@ First, note that running an anova on the linear model produces EXACTLY the same analyze(anova(fit)) ``` - But we can to have a look at the model itself, which is richer than the ANOVA. +But we can to have a look at the model itself, which is richer than the ANOVA. ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} analyze(fit) @@ -61,6 +80,8 @@ analyze(fit) Again, three lines of "effect". One could think these a similar to the ones of ANOVA, with the main effect of Sex, the main effect of concealing and the interaction. But they are not exactly that! they refer to specific parameters (or "effects") of the model. +*So an ANOVA reports each mean and a p-value that says at least two are significantly different. A regression reports only one mean (as an intercept), and the differences between that one and all other means, but the p-values evaluate those specific comparisons.* + # Visualize the data The traditional way of visualizing the daa in this case, is do to a scatter plot with the dependent variable as Y, the linear predictor as X and the factor as colour. We can then draw the regression lines. @@ -116,6 +137,7 @@ This is what our model predicted. # Effects + ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} refdata <- df %>% select(Sex, Concealing) %>% diff --git a/docs/_posts/preparation/analyze_fa.Rmd b/docs/_posts/preparation/analyze_fa.Rmd index d2443eb..7f2f62e 100644 --- a/docs/_posts/preparation/analyze_fa.Rmd +++ b/docs/_posts/preparation/analyze_fa.Rmd @@ -19,9 +19,10 @@ editor_options: library(knitr) ``` -Interested in quicky creating tables and plots for factor analysis? +Interested in quicky creating formatted tables and plots for factor analysis? + +# The data -# Do a correlation ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} library(tidyverse) library(psycho) @@ -36,8 +37,11 @@ df <- psych::bfi %>% psycho::n_factors(df) ``` + The [Method Agreement Procedure](https://neuropsychology.github.io/psycho.R/2018/05/24/n_factors.html) suggests to retain 5 factors. +# Do the EFA + ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} fa <- psych::fa(df, nfactors=5) results <- analyze(fa) @@ -46,30 +50,34 @@ results <- analyze(fa) # Loadings Table -```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} +```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='hide', fig.align='center', comment=NA, message=FALSE, warning=FALSE} summary(results) ``` +```{r, fig.width=7, fig.height=4.5, echo=FALSE, message=FALSE, warning=FALSE} +knitr::kable(summary(results), digits=2) +``` +Running a summary on the analyzed `fa` object returns a dataframe, which is convenient to save, manipulate or enjoy. - -# Lavaan-ready Model for CFA +# Loadings Plot ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} -print(results) - -analyze(fa, treshold=0.2) +plot(results) ``` + # Lavaan-ready Model for CFA ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} -plot(results) +print(results) + +analyze(fa, treshold=0.2) ``` # Contribute -Of course, these reporting standards should change, depending on new expert recommandations or official guidelines. **The goal of this package is to flexibly adaptive to new changes and good practices evolution**. Therefore, if you have any advices, opinions or such, we encourage you to either let us know by opening an [issue](https://github.com/neuropsychology/psycho.R/issues), or even better, try to implement them yourself by [contributing](https://github.com/neuropsychology/psycho.R/blob/master/.github/CONTRIBUTING.md) to the code. +**psycho is a young package in need of affection**. Therefore, if you have any advices, opinions or such, we encourage you to either let us know by opening an [issue](https://github.com/neuropsychology/psycho.R/issues), or by [contributing](https://github.com/neuropsychology/psycho.R/blob/master/.github/CONTRIBUTING.md) to the code. # Credits diff --git a/docs/_posts/preparation/nested_effects.Rmd b/docs/_posts/preparation/nested_effects.Rmd index 76dce9d..552a117 100644 --- a/docs/_posts/preparation/nested_effects.Rmd +++ b/docs/_posts/preparation/nested_effects.Rmd @@ -1,5 +1,5 @@ --- -title: "GLMs: Nested Effects or Interaction?" +title: "GLMs: Nested Variables or Interaction?" layout: post output: md_document: @@ -19,7 +19,7 @@ editor_options: library(knitr) ``` -When I started doing statistics, after a bachelor in psychology, there was, to me, no such things as nested effects. You tested a model (often directly the ANOVA) with the interaction or without. +Psychologists are, for historical reasons, familiar with categorical predictors. These are reffered to as "effects" of different conditions upon an outcome variable. Interestingly, one of the first topic that is taught to first year students, in experimental psychology, is "interactions". When I started doing statistics, after a bachelor in psychology, there was, to me, no such things as nested effects or variables. You tested a model (often directly the ANOVA) with the interaction or without. Let's take interest in the relationship between **life satisfaction**, **sex** and **concealing** (the tendency to suppress or hide one's emotions) @@ -35,7 +35,7 @@ df <- psycho::affective %>% summary(df) ``` -# Primitve Way +# Primitive Way ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} anova_results <- aov(Life_Satisfaction ~ Sex * Concealing, data=df) analyze(anova_results) @@ -44,6 +44,7 @@ analyze(anova_results) # Regression ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} fit <- lm(Life_Satisfaction ~ Sex * Concealing, data=df) +summary(fit) ``` ```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} @@ -58,6 +59,22 @@ analyze(fit) Again, three lines of "effect". One could think these a similar to the one + + +# Visualize + +```{r, fig.width=7, fig.height=4.5, eval = TRUE, results='markup', fig.align='center', comment=NA, message=FALSE, warning=FALSE} +fit <- lm(Life_Satisfaction ~ Adjusting / Concealing, data=df) +summary(fit) +df %>% + refdata(c("Concealing", "Adjusting"), length.out = 5) %>% + get_predicted(fit, .) %>% + ggplot(aes(x=Adjusting, y=Life_Satisfaction_Predicted, alpha=Concealing, group=Concealing)) + + geom_line() +``` + + + # Contribute Of course, these reporting standards should change, depending on new expert recommandations or official guidelines. **The goal of this package is to flexibly adaptive to new changes and good practices evolution**. Therefore, if you have any advices, opinions or such, we encourage you to either let us know by opening an [issue](https://github.com/neuropsychology/psycho.R/issues), or even better, try to implement them yourself by [contributing](https://github.com/neuropsychology/psycho.R/blob/master/.github/CONTRIBUTING.md) to the code. diff --git a/docs/_posts/preparation/package_downloads.Rmd b/docs/_posts/preparation/package_downloads.Rmd index dcc8102..ce5cc9a 100644 --- a/docs/_posts/preparation/package_downloads.Rmd +++ b/docs/_posts/preparation/package_downloads.Rmd @@ -93,8 +93,8 @@ events <- left_join(events, data, by="date") library(rstanarm) library(psycho) -# fit <- rstanarm::stan_glm(count~poly(day_num, 5), data=data) -fit <- glm(count~poly(day_num, 5), data=data) +fit <- rstanarm::stan_glm(count~poly(day_num, 5), data=data) +# fit <- glm(count~poly(day_num, 5), data=data) psycho::get_predicted(fit, data) %>% ggplot(aes(x=date, group=1)) + @@ -108,7 +108,6 @@ psycho::get_predicted(fit, data) %>% labels=scales::date_format("%Y-%m")) + xlab("") + ylab("CRAN Downloads\n") - ``` # Day diff --git a/docs/index.html b/docs/index.html index 07b52c5..35c45af 100644 --- a/docs/index.html +++ b/docs/index.html @@ -14,7 +14,7 @@

About

People

- Current contributors are D. Makowski, V. Simko, and YOU? + Current contributors are D. Makowski, H. Najberg, V. Simko, and YOU?

Posts

diff --git a/docs/~$index.html b/docs/~$index.html deleted file mode 100644 index dbf5605..0000000 Binary files a/docs/~$index.html and /dev/null differ diff --git a/man/HDImax.Rd b/man/HDImax.Rd new file mode 100644 index 0000000..e2bb438 --- /dev/null +++ b/man/HDImax.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hdi.R +\name{HDImax} +\alias{HDImax} +\title{Highest Density Intervals (HDI)} +\usage{ +HDImax(x, prob = 0.95) +} +\arguments{ +\item{x}{A vector of values from a probability distribution (e.g., posterior probabilities from MCMC sampling).} + +\item{prob}{Scalar between 0 and 1, indicating the mass within the credible interval that is to be estimated.} +} +\description{ +See \link[=HDI]{HDI} +} diff --git a/man/HDImin.Rd b/man/HDImin.Rd new file mode 100644 index 0000000..023e147 --- /dev/null +++ b/man/HDImin.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hdi.R +\name{HDImin} +\alias{HDImin} +\title{Highest Density Intervals (HDI)} +\usage{ +HDImin(x, prob = 0.95) +} +\arguments{ +\item{x}{A vector of values from a probability distribution (e.g., posterior probabilities from MCMC sampling).} + +\item{prob}{Scalar between 0 and 1, indicating the mass within the credible interval that is to be estimated.} +} +\description{ +See \link[=HDI]{HDI} +} diff --git a/man/analyze.glmerMod.Rd b/man/analyze.glmerMod.Rd index ec1f834..a6ccf46 100644 --- a/man/analyze.glmerMod.Rd +++ b/man/analyze.glmerMod.Rd @@ -4,7 +4,8 @@ \alias{analyze.glmerMod} \title{Analyze glmerMod objects.} \usage{ -\method{analyze}{glmerMod}(x, CI = 95, effsize_rules = "cohen1988", ...) +\method{analyze}{glmerMod}(x, CI = 95, effsize_rules = "cohen1988", + ...) } \arguments{ \item{x}{merModLmerTest object.} diff --git a/man/analyze.lmerModLmerTest.Rd b/man/analyze.lmerModLmerTest.Rd index 526ed1b..d8ee989 100644 --- a/man/analyze.lmerModLmerTest.Rd +++ b/man/analyze.lmerModLmerTest.Rd @@ -4,8 +4,8 @@ \alias{analyze.lmerModLmerTest} \title{Analyze lmerModLmerTest objects.} \usage{ -\method{analyze}{lmerModLmerTest}(x, CI = 95, effsize_rules = "cohen1988", - ...) +\method{analyze}{lmerModLmerTest}(x, CI = 95, + effsize_rules = "cohen1988", ...) } \arguments{ \item{x}{lmerModLmerTest object.} diff --git a/man/analyze.stanreg.Rd b/man/analyze.stanreg.Rd index 4f47c62..e5cd31f 100644 --- a/man/analyze.stanreg.Rd +++ b/man/analyze.stanreg.Rd @@ -4,14 +4,20 @@ \alias{analyze.stanreg} \title{Analyze stanreg objects.} \usage{ -\method{analyze}{stanreg}(x, CI = 90, effsize = FALSE, - effsize_rules = "cohen1988", ...) +\method{analyze}{stanreg}(x, CI = 90, index = "overlap", + ROPE_bounds = NULL, effsize = FALSE, effsize_rules = "cohen1988", + ...) } \arguments{ \item{x}{A stanreg model.} \item{CI}{Credible interval bounds.} +\item{index}{Index of effect existence to report. Can be 'overlap' or 'ROPE'.} + +\item{ROPE_bounds}{Bounds ot the ROPE. If NULL and effsize is TRUE, than the ROPE. +will have default values c(-0.1, 0.1) and computed on the standardized posteriors.} + \item{effsize}{Compute Effect Sizes according to Cohen (1988). For linear models only.} \item{effsize_rules}{Grid for effect size interpretation. See \link[=interpret_d]{interpret_d}.} @@ -26,6 +32,7 @@ Contains the following indices: \item{the Credible Interval (CI) (by default, the 90\% CI; see Kruschke, 2018), representing a range of possible parameter.} \item{the Maximum Probability of Effect (MPE), the probability that the effect is positive or negative (depending on the median’s direction).} \item{the Overlap (O), the percentage of overlap between the posterior distribution and a normal distribution of mean 0 and same SD than the posterior. Can be interpreted as the probability that a value from the posterior distribution comes from a null distribution.} + \item{the ROPE, the proportion of the 95\% CI of the posterior distribution that lies within the region of practical equivalence.} } } \description{ diff --git a/man/assess.Rd b/man/assess.Rd index c49fb76..a059255 100644 --- a/man/assess.Rd +++ b/man/assess.Rd @@ -4,10 +4,11 @@ \alias{assess} \title{Compare a patient's score to a control group} \usage{ -assess(patient, mean = 0, sd = 1, n = NULL, controls = NULL, CI = 95, - treshold = 0.05, iter = 10000, color_controls = "#2196F3", - color_CI = "#E91E63", color_score = "black", color_size = 2, - alpha_controls = 1, alpha_CI = 0.8, verbose = TRUE) +assess(patient, mean = 0, sd = 1, n = NULL, controls = NULL, + CI = 95, treshold = 0.05, iter = 10000, + color_controls = "#2196F3", color_CI = "#E91E63", + color_score = "black", color_size = 2, alpha_controls = 1, + alpha_CI = 0.8, verbose = TRUE) } \arguments{ \item{patient}{Single value (patient's score).} diff --git a/man/crawford.test.Rd b/man/crawford.test.Rd index 2a72293..ad5c867 100644 --- a/man/crawford.test.Rd +++ b/man/crawford.test.Rd @@ -6,8 +6,9 @@ \usage{ crawford.test(patient, controls = NULL, mean = NULL, sd = NULL, n = NULL, CI = 95, treshold = 0.1, iter = 10000, - color_controls = "#2196F3", color_CI = "#E91E63", color_score = "black", - color_size = 2, alpha_controls = 1, alpha_CI = 0.8) + color_controls = "#2196F3", color_CI = "#E91E63", + color_score = "black", color_size = 2, alpha_controls = 1, + alpha_CI = 0.8) } \arguments{ \item{patient}{Single value (patient's score).} diff --git a/man/find_best_model.Rd b/man/find_best_model.Rd index dafbcfb..2b70942 100644 --- a/man/find_best_model.Rd +++ b/man/find_best_model.Rd @@ -16,7 +16,7 @@ Returns the best model. See the documentation for your model's class: \itemize{ \item{\link[=find_best_model.stanreg]{find_best_model.stanreg}} - \item{\link[=find_best_model.merModLmerTest]{find_best_model.merModLmerTest}} + \item{\link[=find_best_model.lmerModLmerTest]{find_best_model.lmerModLmerTest}} } } \seealso{ diff --git a/man/find_best_model.merModLmerTest.Rd b/man/find_best_model.lmerModLmerTest.Rd similarity index 79% rename from man/find_best_model.merModLmerTest.Rd rename to man/find_best_model.lmerModLmerTest.Rd index ce528b4..165477d 100644 --- a/man/find_best_model.merModLmerTest.Rd +++ b/man/find_best_model.lmerModLmerTest.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/find_best_model.merModLmerTest.R -\name{find_best_model.merModLmerTest} -\alias{find_best_model.merModLmerTest} +% Please edit documentation in R/find_best_model.lmerModLmerTest.R +\name{find_best_model.lmerModLmerTest} +\alias{find_best_model.lmerModLmerTest} \title{Returns the best combination of predictors for lmerTest objects.} \usage{ -\method{find_best_model}{merModLmerTest}(fit, interaction = TRUE, +\method{find_best_model}{lmerModLmerTest}(fit, interaction = TRUE, fixed = NULL, ...) } \arguments{ diff --git a/man/find_best_model.stanreg.Rd b/man/find_best_model.stanreg.Rd index 2542970..e6f0788 100644 --- a/man/find_best_model.stanreg.Rd +++ b/man/find_best_model.stanreg.Rd @@ -4,8 +4,8 @@ \alias{find_best_model.stanreg} \title{Returns the best combination of predictors based on LOO cross-validation indices.} \usage{ -\method{find_best_model}{stanreg}(fit, interaction = TRUE, fixed = NULL, - K = 10, k_treshold = NULL, ...) +\method{find_best_model}{stanreg}(fit, interaction = TRUE, + fixed = NULL, K = 10, k_treshold = NULL, ...) } \arguments{ \item{fit}{A stanreg object.} diff --git a/man/find_combinations.formula.Rd b/man/find_combinations.formula.Rd index 1a8da11..fab1726 100644 --- a/man/find_combinations.formula.Rd +++ b/man/find_combinations.formula.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/find_combinations.formula.R +% Please edit documentation in R/find_combinations.R \name{find_combinations.formula} \alias{find_combinations.formula} \title{Generate all combinations of predictors of a formula.} diff --git a/man/find_matching_string.Rd b/man/find_matching_string.Rd index 394c29c..c4f558c 100644 --- a/man/find_matching_string.Rd +++ b/man/find_matching_string.Rd @@ -4,7 +4,8 @@ \alias{find_matching_string} \title{Fuzzy string matching.} \usage{ -find_matching_string(x, y, value = TRUE, step = 0.1, ignore.case = TRUE) +find_matching_string(x, y, value = TRUE, step = 0.1, + ignore.case = TRUE) } \arguments{ \item{x}{Strings.} diff --git a/man/format_digit.Rd b/man/format_digit.Rd index e7d9926..32c80bf 100644 --- a/man/format_digit.Rd +++ b/man/format_digit.Rd @@ -4,7 +4,8 @@ \alias{format_digit} \title{Format digits.} \usage{ -format_digit(x, digits = 2, null_treshold = 0.001, inf_treshold = 9e+08) +format_digit(x, digits = 2, null_treshold = 0.001, + inf_treshold = 9e+08) } \arguments{ \item{x}{A digit.} diff --git a/man/get_contrasts.lmerModLmerTest.Rd b/man/get_contrasts.lmerModLmerTest.Rd index f92b367..87da4d2 100644 --- a/man/get_contrasts.lmerModLmerTest.Rd +++ b/man/get_contrasts.lmerModLmerTest.Rd @@ -4,7 +4,8 @@ \alias{get_contrasts.lmerModLmerTest} \title{Compute estimated marginal means and contrasts from lmerModLmerTest models.} \usage{ -\method{get_contrasts}{lmerModLmerTest}(fit, formula, adjust = "tukey", ...) +\method{get_contrasts}{lmerModLmerTest}(fit, formula, adjust = "tukey", + ...) } \arguments{ \item{fit}{A merModLmerTest model.} diff --git a/man/get_data.Rd b/man/get_data.Rd index b355bc9..66f3d53 100644 --- a/man/get_data.Rd +++ b/man/get_data.Rd @@ -16,6 +16,7 @@ Extract the dataframe used in a model. } \examples{ \dontrun{ +library(tidyverse) library(psycho) df <- mtcars \%>\% @@ -27,6 +28,7 @@ fit <- lm(wt ~ cyl, data=df) fit <- lm(wt ~ mpg * cyl, data=df) fit <- lm(wt ~ cyl * gear, data=df) fit <- lmerTest::lmer(wt ~ mpg * gear + (1|cyl), data=df) +fit <- rstanarm::stan_lmer(wt ~ mpg * gear + (1|cyl), data=df) get_data(fit) diff --git a/man/get_predicted.stanreg.Rd b/man/get_predicted.stanreg.Rd index 2cdc53a..20bf1fa 100644 --- a/man/get_predicted.stanreg.Rd +++ b/man/get_predicted.stanreg.Rd @@ -6,7 +6,7 @@ \usage{ \method{get_predicted}{stanreg}(fit, newdata = "model", prob = 0.9, odds_to_probs = TRUE, keep_iterations = FALSE, draws = NULL, - posterior_predict = FALSE, seed = NULL, ...) + posterior_predict = FALSE, seed = NULL, transform = FALSE, ...) } \arguments{ \item{fit}{A stanreg model.} @@ -25,6 +25,8 @@ \item{seed}{An optional seed to use.} +\item{transform}{If posterior_predict is False, should the linear predictor be transformed using the inverse-link function? The default is FALSE, in which case the untransformed linear predictor is returned.} + \item{...}{Arguments passed to or from other methods.} } \value{ diff --git a/man/hdi.Rd b/man/hdi.Rd index 23fec27..1944bfd 100644 --- a/man/hdi.Rd +++ b/man/hdi.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/hdi.R -\name{hdi} -\alias{hdi} +\name{HDI} +\alias{HDI} \title{Highest Density Intervals (HDI).} \usage{ -hdi(x, prob = 0.95) +HDI(x, prob = 0.95) } \arguments{ \item{x}{A vector of values from a probability distribution (e.g., posterior probabilities from MCMC sampling).} @@ -18,13 +18,13 @@ Compute the Highest Density Intervals (HDI) of a distribution. library(psycho) distribution <- rnorm(1000, 0, 1) -hdi_values <- hdi(distribution) -print(hdi_values) -plot(hdi_values) -summary(hdi_values) +HDI_values <- HDI(distribution) +print(HDI_values) +plot(HDI_values) +summary(HDI_values) x <- matrix(rexp(200), 100) -hdi_values <- hdi(x) +HDI_values <- HDI(x) } \author{ diff --git a/man/interpret_r.Rd b/man/interpret_r.Rd index a42647e..1c54929 100644 --- a/man/interpret_r.Rd +++ b/man/interpret_r.Rd @@ -4,7 +4,8 @@ \alias{interpret_r} \title{Correlation coefficient r interpreation.} \usage{ -interpret_r(x, direction = TRUE, strength = TRUE, rules = "cohen1988") +interpret_r(x, direction = TRUE, strength = TRUE, + rules = "cohen1988") } \arguments{ \item{x}{Correlation coefficient.} diff --git a/man/simulate_data_regression.Rd b/man/simulate_data_regression.Rd new file mode 100644 index 0000000..1580b26 --- /dev/null +++ b/man/simulate_data_regression.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate.R +\name{simulate_data_regression} +\alias{simulate_data_regression} +\title{Simulates data for single or multiple regression.} +\usage{ +simulate_data_regression(coefs = 0.5, sample = 100, error = 0) +} +\arguments{ +\item{coefs}{Desired theorethical coefs. Can be a single value or a list.} + +\item{sample}{Desired sample size.} + +\item{error}{The error (standard deviation of gaussian noise).} +} +\description{ +Simulates data for single or multiple regression. +} +\details{ +See https://stats.stackexchange.com/questions/59062/multiple-linear-regression-simulation +} +\examples{ +library(psycho) + +data <- simulate_data_regression(coefs=c(0.1, 0.8), sample=50, error=0) +fit <- lm(y ~ ., data=data) +coef(fit) +analyze(fit) + +} +\author{ +TPArrow +} diff --git a/tests/testthat/test-analyze.lm.R b/tests/testthat/test-analyze.lm.R index 92677d7..eb1df1b 100644 --- a/tests/testthat/test-analyze.lm.R +++ b/tests/testthat/test-analyze.lm.R @@ -13,4 +13,12 @@ test_that("If it works.", { # test summary summa <- summary(model, round = 2) testthat::expect_equal(nrow(summa), 2) + + + # Poly + fit <- lm(Sepal.Width ~ poly(Sepal.Length, 2), data = iris) + + model <- analyze(fit) + values <- values(model) + testthat::expect_equal(round(values$effects$`poly(Sepal.Length, 2)2`$Coef, 2), 0.82, tolerance = 0.01) }) diff --git a/tests/testthat/test-hdi.R b/tests/testthat/test-hdi.R index 32b92bb..dd0cd3c 100644 --- a/tests/testthat/test-hdi.R +++ b/tests/testthat/test-hdi.R @@ -2,9 +2,9 @@ context("hdi") test_that("Correct Value", { x <- attitude$rating - results <- psycho::hdi(x, 0.95) + results <- psycho::HDI(x, 0.95) testthat::expect_equal(results$values$HDImin, 40) testthat::expect_equal(length(plot(results)), 9) - testthat::expect_equal(psycho::hdi(x, 95)$values$HDImin, 40) + testthat::expect_equal(psycho::HDI(x, 95)$values$HDImin, 40) }) diff --git a/vignettes/bayesian.Rmd b/vignettes/bayesian.Rmd index e587b1d..e870d5a 100644 --- a/vignettes/bayesian.Rmd +++ b/vignettes/bayesian.Rmd @@ -78,7 +78,7 @@ fit <- rstanarm::stan_glm(Y ~ X, seed=666, data=data.frame(Y,X)) values <- values(analyze(fit)) posterior <- values$effects$X$posterior density <- density(posterior, n = length(posterior)) -hdi <- hdi(posterior, 0.90) +hdi <- HDI(posterior, 0.90) mpe <- mpe(posterior)$MPE ```