@@ -517,6 +517,11 @@ print.parameters_distribution <- function(x, digits = 2, ...) {
517
517
# ' The value that appears most frequently in the provided data.
518
518
# ' The returned data structure will be the same as the entered one.
519
519
# '
520
+ # ' @seealso For continuous variables, the
521
+ # ' **Highest Maximum a Posteriori probability estimate (MAP)** may be
522
+ # ' more a more useful way to estimate the most commonly-observed value
523
+ # ' than the mode. See [bayestestR::map_estimate()].
524
+ # '
520
525
# ' @examples
521
526
# '
522
527
# ' distribution_mode(c(1, 2, 3, 3, 4, 5))
@@ -529,3 +534,115 @@ distribution_mode <- function(x) {
529
534
idx <- which.max(tab )
530
535
uniqv [idx ]
531
536
}
537
+
538
+ # ' Compute the coefficient of variation
539
+ # '
540
+ # ' Compute the coefficient of variation (CV, ratio of the standard deviation to
541
+ # ' the mean, $\frac{\sigma}{\mu}$) for a set of numeric values. Can also be
542
+ # ' used to compute the CV for a fitted model.
543
+ # '
544
+ # ' @param x A numeric vector, or a model of a supported class.
545
+ # ' @param ... Further arguments based to other methods.
546
+ # '
547
+ # ' @return The computed coefficient of variation for `x`.
548
+ # ' @export
549
+ # '
550
+ # ' @examples
551
+ # ' coef_var(1:10)
552
+ # ' coef_var(1:10, method = "qcd")
553
+ # ' coef_var(mu = 10, sigma = 20)
554
+ # ' coef_var(mu = 10, sigma = 20, method = "unbiased", n = 30)
555
+ # ' cv(1:10)
556
+ coef_var <- function (x , ... ) {
557
+ UseMethod(" coef_var" )
558
+ }
559
+
560
+ # ' @name cv
561
+ # ' @rdname coef_var
562
+ # ' @export
563
+ cv <- coef_var
564
+
565
+ # ' @name distribution_cv
566
+ # ' @rdname coef_var
567
+ # ' @export
568
+ distribution_cv <- coef_var
569
+
570
+ # ' @export
571
+ coef_var.default <- function (x , verbose = TRUE , ... ) {
572
+ if (verbose ) {
573
+ warning(insight :: format_message(paste0(" Can't compute the coefficient of variation objects of class '" , class(x )[1 ], " '." )), call. = FALSE )
574
+ }
575
+ NULL
576
+ }
577
+
578
+ # ' @param mu A numeric vector of mean values to use to compute the coefficient
579
+ # ' of variation. If supplied, `x` is not used to compute the mean.
580
+ # ' @param sigma A numeric vector of standard deviation values to use to compute the coefficient
581
+ # ' of variation. If supplied, `x` is not used to compute the SD.
582
+ # ' @param method Method to use to compute the CV. Can be `"standard"` to compute
583
+ # ' by dividing the standard deviation by the mean, `"unbiased"` for the
584
+ # ' unbiased estimator for normally distributed data, or one of two robust
585
+ # ' alternatives: `"median_mad"` to divide the median by the [stats::mad()],
586
+ # ' or `"qcd"` (quartile coefficient of dispersion, interquartile range divided
587
+ # ' by the sum of the quartiles [twice the midhinge]: $\frac{Q_3 - Q_1}{Q_3 + Q_1}$).
588
+ # ' @param trim the fraction (0 to 0.5) of values to be trimmed from
589
+ # ' each end of `x` before the mean and standard deviation (or alternatvies)
590
+ # ' are computed. Values of `trim` outside the range of (0 to 0.5) are taken
591
+ # ' as the nearest endpoint.
592
+ # ' @param na.rm Logical. Should `NA` values be removed before computing (`TRUE`)
593
+ # ' or not (`FALSE`, default)?
594
+ # ' @param n If `method = "unbiased"` and both `mu` and `sigma` are provided (not
595
+ # ' computed from `x`), what sample size to use to adjust the computed CV
596
+ # ' for small-sample bias?
597
+ # '
598
+ # ' @rdname coef_var
599
+ # '
600
+ # ' @export
601
+ coef_var.numeric <- function (x , mu = NULL , sigma = NULL , unbiased = TRUE ,
602
+ method = c(" standard" , " unbiased" , " median_mad" , " qcd" ),
603
+ trim = 0 , na.rm = FALSE , n = NULL , ... ) {
604
+ # TODO: Support weights
605
+ method <- match.arg(method , choices = c(" standard" , " unbiased" , " median_mad" , " qcd" ))
606
+ if (! (is.null(mu ) && is.null(sigma ))) {
607
+ if (isTRUE(na.rm )) {
608
+ x <- x [! is.na(x )]
609
+ }
610
+ if (! is.numeric(trim ) || length(trim ) != 1L ) {
611
+ stop(" `trim` must be a single numeric value." , call. = FALSE )
612
+ }
613
+ n <- length(x )
614
+ if (trim > 0 && n ) {
615
+ if (anyNA(x )) return (NA_real_ )
616
+ if (trim > = 0.5 ) return (stats :: median(x , na.rm = FALSE ))
617
+ lo <- floor(n * trim ) + 1
618
+ hi <- n + 1 - lo
619
+ x <- sort.int(x , partial = unique(c(lo , hi )))[lo : hi ]
620
+ }
621
+ }
622
+ if (! is.null(mu )) {
623
+ mu <- switch (
624
+ method ,
625
+ standard , unbiased = mean(x , ... ),
626
+ median_mad = stats :: median(x , ... ),
627
+ qcd = diff(stats :: quantile(x , probs = c(.25 , .75 ), ... ))
628
+ )
629
+ }
630
+ if (! is.null(sigma )) {
631
+ sigma <- switch (
632
+ method ,
633
+ standard , unbiased = sd(x , ... ),
634
+ median_mad = stats :: mad(x , center = mu , ... ),
635
+ qcd = sum(stats :: quantile(x , probs = c(.25 , .75 ), ... ))
636
+ )
637
+ }
638
+ out <- sigma / mu
639
+ if (method == " unbiased" ) {
640
+ if (is.null(n )) {
641
+ stop(insight :: format_message(" A value for `n` must be provided when `method = 'unbiased'` and both `mu` and `sigma` are provided" ),
642
+ call. = FALSE )
643
+ }
644
+ # from DescTools::CoefVar
645
+ out <- out * (1 - 1 / (4 * (n - 1 )) + 1 / n * out ^ 2 + 1 / (2 * (n - 1 )^ 2 ))
646
+ }
647
+ return (out )
648
+ }
0 commit comments