19
19
# ' @param data Dataframe or vector.
20
20
# ' @param threshold The amount of winsorization.
21
21
# ' @param verbose Toggle warnings.
22
+ # ' @param robust Logical, if TRUE, winsorizing is done via the median absolute deviation (MAD).
22
23
# ' @param ... Currently not used.
23
24
# '
24
25
# ' @examples
25
26
# ' winsorize(iris$Sepal.Length, threshold = 0.2)
27
+ # ' winsorize(iris$Sepal.Length, threshold = 3, robust = TRUE)
26
28
# ' winsorize(iris, threshold = 0.2)
27
29
# ' @inherit data_rename seealso
28
30
# ' @export
@@ -43,27 +45,50 @@ winsorize.character <- winsorize.factor
43
45
winsorize.logical <- winsorize.factor
44
46
45
47
# ' @export
46
- winsorize.data.frame <- function (data , threshold = 0.2 , verbose = TRUE , ... ) {
47
- out <- sapply(data , winsorize , threshold = threshold , verbose = verbose )
48
+ winsorize.data.frame <- function (data , threshold = 0.2 , verbose = TRUE , robust = FALSE , ... ) {
49
+ out <- sapply(data , winsorize , threshold = threshold , verbose = verbose , robust = robust )
48
50
as.data.frame(out )
49
51
}
50
52
51
53
# ' @rdname winsorize
52
54
# ' @export
53
- winsorize.numeric <- function (data , threshold = 0.2 , verbose = TRUE , ... ) {
54
- if (threshold < 0 || threshold > 1 ) {
55
- if (isTRUE(verbose )) {
56
- warning(" 'threshold' for winsorization must be a scalar between 0 and 1. Did not winsorize data." , call. = FALSE )
57
- }
58
- return (data )
55
+ winsorize.numeric <- function (data , threshold = 0.2 , verbose = TRUE , robust = FALSE , ... ) {
56
+ if (robust == FALSE ) {
57
+
58
+ if (threshold < 0 || threshold > 0.5 ) {
59
+ if (isTRUE(verbose )) {
60
+ warning(" 'threshold' for winsorization must be a scalar between 0 and 0.5. Did not winsorize data." , call. = FALSE )
61
+ }
62
+ return (data )
63
+ }
64
+
65
+ y <- sort(data )
66
+ n <- length(data )
67
+ ibot <- floor(threshold * n ) + 1
68
+ itop <- length(data ) - ibot + 1
69
+ xbot <- y [ibot ]
70
+ xtop <- y [itop ]
71
+
72
+ winval <- data
73
+ winval [winval < = xbot ] <- xbot
74
+ winval [winval > = xtop ] <- xtop
75
+ return (winval )
59
76
}
60
77
61
- y <- sort(data )
62
- n <- length(data )
63
- ibot <- floor(threshold * n ) + 1
64
- itop <- length(data ) - ibot + 1
65
- xbot <- y [ibot ]
66
- xtop <- y [itop ]
67
- winval <- ifelse(data < = xbot , xbot , data )
68
- ifelse(winval > = xtop , xtop , winval )
78
+ if (robust == TRUE ) {
79
+
80
+ if (threshold < = 0 ) {
81
+ if (isTRUE(verbose )) {
82
+ warning(" 'threshold' for winsorization must be a scalar greater than 1. Did not winsorize data." , call. = FALSE )
83
+ }
84
+ return (data )
85
+ }
86
+
87
+ med <- median(data , na.rm = TRUE )
88
+ y <- data - med
89
+ sc <- mad(y , center = 0 , na.rm = TRUE ) * threshold
90
+ y [y > sc ] <- sc
91
+ y [y < - sc ] <- - sc
92
+ y + med
93
+ }
69
94
}
0 commit comments