Skip to content

Commit 3cb5a12

Browse files
committed
addresses easystats#177 & easystats#49 & easystats#47 for winsorizing based on the MAD
1 parent e986a8d commit 3cb5a12

File tree

1 file changed

+41
-16
lines changed

1 file changed

+41
-16
lines changed

R/winsorize.R

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
#' @param data Dataframe or vector.
2020
#' @param threshold The amount of winsorization.
2121
#' @param verbose Toggle warnings.
22+
#' @param robust Logical, if TRUE, winsorizing is done via the median absolute deviation (MAD).
2223
#' @param ... Currently not used.
2324
#'
2425
#' @examples
2526
#' winsorize(iris$Sepal.Length, threshold = 0.2)
27+
#' winsorize(iris$Sepal.Length, threshold = 3, robust = TRUE)
2628
#' winsorize(iris, threshold = 0.2)
2729
#' @inherit data_rename seealso
2830
#' @export
@@ -43,27 +45,50 @@ winsorize.character <- winsorize.factor
4345
winsorize.logical <- winsorize.factor
4446

4547
#' @export
46-
winsorize.data.frame <- function(data, threshold = 0.2, verbose = TRUE, ...) {
47-
out <- sapply(data, winsorize, threshold = threshold, verbose = verbose)
48+
winsorize.data.frame <- function(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...) {
49+
out <- sapply(data, winsorize, threshold = threshold, verbose = verbose, robust = robust)
4850
as.data.frame(out)
4951
}
5052

5153
#' @rdname winsorize
5254
#' @export
53-
winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, ...) {
54-
if (threshold < 0 || threshold > 1) {
55-
if (isTRUE(verbose)) {
56-
warning("'threshold' for winsorization must be a scalar between 0 and 1. Did not winsorize data.", call. = FALSE)
57-
}
58-
return(data)
55+
winsorize.numeric <- function(data, threshold = 0.2, verbose = TRUE, robust = FALSE, ...) {
56+
if(robust == FALSE) {
57+
58+
if (threshold < 0 || threshold > 0.5) {
59+
if (isTRUE(verbose)) {
60+
warning("'threshold' for winsorization must be a scalar between 0 and 0.5. Did not winsorize data.", call. = FALSE)
61+
}
62+
return(data)
63+
}
64+
65+
y <- sort(data)
66+
n <- length(data)
67+
ibot <- floor(threshold * n) + 1
68+
itop <- length(data) - ibot + 1
69+
xbot <- y[ibot]
70+
xtop <- y[itop]
71+
72+
winval <- data
73+
winval[winval <= xbot] <- xbot
74+
winval[winval >= xtop] <- xtop
75+
return(winval)
5976
}
6077

61-
y <- sort(data)
62-
n <- length(data)
63-
ibot <- floor(threshold * n) + 1
64-
itop <- length(data) - ibot + 1
65-
xbot <- y[ibot]
66-
xtop <- y[itop]
67-
winval <- ifelse(data <= xbot, xbot, data)
68-
ifelse(winval >= xtop, xtop, winval)
78+
if(robust == TRUE) {
79+
80+
if (threshold <= 0) {
81+
if (isTRUE(verbose)) {
82+
warning("'threshold' for winsorization must be a scalar greater than 1. Did not winsorize data.", call. = FALSE)
83+
}
84+
return(data)
85+
}
86+
87+
med <- median(data, na.rm = TRUE)
88+
y <- data - med
89+
sc <- mad(y, center = 0, na.rm = TRUE) * threshold
90+
y[y > sc] <- sc
91+
y[y < -sc] <- -sc
92+
y + med
93+
}
6994
}

0 commit comments

Comments
 (0)