Skip to content

Commit 3ca9b46

Browse files
committed
Add coefficient of variation function
Related to #232, easystats/performance#433
1 parent 34385ef commit 3ca9b46

File tree

5 files changed

+201
-1
lines changed

5 files changed

+201
-1
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ VignetteBuilder:
6262
Encoding: UTF-8
6363
Language: en-US
6464
Roxygen: list(markdown = TRUE)
65-
RoxygenNote: 7.2.1
65+
RoxygenNote: 7.2.1.9000
6666
Config/testthat/edition: 3
6767
Config/Needs/website:
6868
rstudio/bslib,

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ S3method(center,factor)
2020
S3method(center,grouped_df)
2121
S3method(center,logical)
2222
S3method(center,numeric)
23+
S3method(coef_var,default)
24+
S3method(coef_var,numeric)
2325
S3method(convert_na_to,character)
2426
S3method(convert_na_to,data.frame)
2527
S3method(convert_na_to,default)
@@ -158,13 +160,15 @@ export(center)
158160
export(centre)
159161
export(change_code)
160162
export(change_scale)
163+
export(coef_var)
161164
export(coerce_to_numeric)
162165
export(colnames_to_row)
163166
export(column_as_rownames)
164167
export(compact_character)
165168
export(compact_list)
166169
export(convert_na_to)
167170
export(convert_to_na)
171+
export(cv)
168172
export(data_addprefix)
169173
export(data_addsuffix)
170174
export(data_adjust)
@@ -195,6 +199,7 @@ export(degroup)
195199
export(demean)
196200
export(describe_distribution)
197201
export(detrend)
202+
export(distribution_cv)
198203
export(distribution_mode)
199204
export(empty_columns)
200205
export(empty_rows)

R/describe_distribution.R

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,11 @@ print.parameters_distribution <- function(x, digits = 2, ...) {
517517
#' The value that appears most frequently in the provided data.
518518
#' The returned data structure will be the same as the entered one.
519519
#'
520+
#' @seealso For continuous variables, the
521+
#' **Highest Maximum a Posteriori probability estimate (MAP)** may be
522+
#' more a more useful way to estimate the most commonly-observed value
523+
#' than the mode. See [bayestestR::map_estimate()].
524+
#'
520525
#' @examples
521526
#'
522527
#' distribution_mode(c(1, 2, 3, 3, 4, 5))
@@ -529,3 +534,115 @@ distribution_mode <- function(x) {
529534
idx <- which.max(tab)
530535
uniqv[idx]
531536
}
537+
538+
#' Compute the coefficient of variation
539+
#'
540+
#' Compute the coefficient of variation (CV, ratio of the standard deviation to
541+
#' the mean, $\frac{\sigma}{\mu}$) for a set of numeric values. Can also be
542+
#' used to compute the CV for a fitted model.
543+
#'
544+
#' @param x A numeric vector, or a model of a supported class.
545+
#' @param ... Further arguments based to other methods.
546+
#'
547+
#' @return The computed coefficient of variation for `x`.
548+
#' @export
549+
#'
550+
#' @examples
551+
#' coef_var(1:10)
552+
#' coef_var(1:10, method = "qcd")
553+
#' coef_var(mu = 10, sigma = 20)
554+
#' coef_var(mu = 10, sigma = 20, method = "unbiased", n = 30)
555+
#' cv(1:10)
556+
coef_var <- function(x, ...) {
557+
UseMethod("coef_var")
558+
}
559+
560+
#' @name cv
561+
#' @rdname coef_var
562+
#' @export
563+
cv <- coef_var
564+
565+
#' @name distribution_cv
566+
#' @rdname coef_var
567+
#' @export
568+
distribution_cv <- coef_var
569+
570+
#' @export
571+
coef_var.default <- function(x, verbose = TRUE, ...) {
572+
if (verbose) {
573+
warning(insight::format_message(paste0("Can't compute the coefficient of variation objects of class '", class(x)[1], "'.")), call. = FALSE)
574+
}
575+
NULL
576+
}
577+
578+
#' @param mu A numeric vector of mean values to use to compute the coefficient
579+
#' of variation. If supplied, `x` is not used to compute the mean.
580+
#' @param sigma A numeric vector of standard deviation values to use to compute the coefficient
581+
#' of variation. If supplied, `x` is not used to compute the SD.
582+
#' @param method Method to use to compute the CV. Can be `"standard"` to compute
583+
#' by dividing the standard deviation by the mean, `"unbiased"` for the
584+
#' unbiased estimator for normally distributed data, or one of two robust
585+
#' alternatives: `"median_mad"` to divide the median by the [stats::mad()],
586+
#' or `"qcd"` (quartile coefficient of dispersion, interquartile range divided
587+
#' by the sum of the quartiles [twice the midhinge]: $\frac{Q_3 - Q_1}{Q_3 + Q_1}$).
588+
#' @param trim the fraction (0 to 0.5) of values to be trimmed from
589+
#' each end of `x` before the mean and standard deviation (or alternatvies)
590+
#' are computed. Values of `trim` outside the range of (0 to 0.5) are taken
591+
#' as the nearest endpoint.
592+
#' @param na.rm Logical. Should `NA` values be removed before computing (`TRUE`)
593+
#' or not (`FALSE`, default)?
594+
#' @param n If `method = "unbiased"` and both `mu` and `sigma` are provided (not
595+
#' computed from `x`), what sample size to use to adjust the computed CV
596+
#' for small-sample bias?
597+
#'
598+
#' @rdname coef_var
599+
#'
600+
#' @export
601+
coef_var.numeric <- function(x, mu = NULL, sigma = NULL, unbiased = TRUE,
602+
method = c("standard", "unbiased", "median_mad", "qcd"),
603+
trim = 0, na.rm = FALSE, n = NULL, ...) {
604+
# TODO: Support weights
605+
method <- match.arg(method, choices = c("standard", "unbiased", "median_mad", "qcd"))
606+
if (!(is.null(mu) && is.null(sigma))) {
607+
if (isTRUE(na.rm)) {
608+
x <- x[!is.na(x)]
609+
}
610+
if (!is.numeric(trim) || length(trim) != 1L) {
611+
stop("`trim` must be a single numeric value.", call. = FALSE)
612+
}
613+
n <- length(x)
614+
if (trim > 0 && n) {
615+
if (anyNA(x)) return(NA_real_)
616+
if (trim >= 0.5) return(stats::median(x, na.rm = FALSE))
617+
lo <- floor(n * trim) + 1
618+
hi <- n + 1 - lo
619+
x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]
620+
}
621+
}
622+
if (! is.null(mu)) {
623+
mu <- switch(
624+
method,
625+
standard, unbiased = mean(x, ...),
626+
median_mad = stats::median(x, ...),
627+
qcd = diff(stats::quantile(x, probs = c(.25, .75), ...))
628+
)
629+
}
630+
if (! is.null(sigma)) {
631+
sigma <- switch(
632+
method,
633+
standard, unbiased = sd(x, ...),
634+
median_mad = stats::mad(x, center = mu, ...),
635+
qcd = sum(stats::quantile(x, probs = c(.25, .75), ...))
636+
)
637+
}
638+
out <- sigma / mu
639+
if (method == "unbiased") {
640+
if (is.null(n)) {
641+
stop(insight::format_message("A value for `n` must be provided when `method = 'unbiased'` and both `mu` and `sigma` are provided"),
642+
call. = FALSE)
643+
}
644+
# from DescTools::CoefVar
645+
out <- out * (1 - 1 / (4 * (n - 1)) + 1 / n * out^2 + 1 / (2 * (n - 1)^2))
646+
}
647+
return(out)
648+
}

man/coef_var.Rd

Lines changed: 72 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/distribution_mode.Rd

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)