Skip to content

Commit f9954e9

Browse files
authored
Merge pull request #1756 from rstudio/nanoplot-boxplot-enhance
Correctly render outliers of nanoplot-based boxplots
2 parents 3284691 + d8c904f commit f9954e9

File tree

5 files changed

+59
-24
lines changed

5 files changed

+59
-24
lines changed

R/modify_columns.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2755,7 +2755,7 @@ cols_add <- function(
27552755
#' sales for a selection of days. By converting the string-time 24-hour-clock
27562756
#' time values to the number of seconds elapsed in a day, we get continuous
27572757
#' values that can be incorporated into each box plot. And, by supplying a
2758-
#' function to the `y_val_fmt_fn` argument within `nanoplot_options()`, we can
2758+
#' function to the `y_val_fmt_fn` argument within [nanoplot_options()], we can
27592759
#' transform the integer seconds values back to clock times for display on
27602760
#' hover.
27612761
#'

R/utils_plots.R

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,26 +1211,40 @@ generate_nanoplot <- function(
12111211
box_thickness <- data_point_radius[1] * 6
12121212

12131213
# Calculate statistics for boxplot
1214-
stat_p05 <- unname(stats::quantile(y_vals, probs = 0.05, na.rm = TRUE))
12151214
stat_q_1 <- unname(stats::quantile(y_vals, probs = 0.25, na.rm = TRUE))
12161215
stat_med <- unname(stats::quantile(y_vals, probs = 0.50, na.rm = TRUE))
12171216
stat_q_3 <- unname(stats::quantile(y_vals, probs = 0.75, na.rm = TRUE))
1218-
stat_p95 <- unname(stats::quantile(y_vals, probs = 0.95, na.rm = TRUE))
1217+
stat_iqr <- stats::IQR(y_vals, na.rm = TRUE)
12191218

1220-
if (length(y_vals) > 25) {
1219+
low_outliers <- y_vals[y_vals < stat_q_1 - (1.5 * stat_iqr)]
1220+
high_outliers <- y_vals[y_vals > stat_q_3 + (1.5 * stat_iqr)]
1221+
1222+
stat_min_excl_low_outliers <-
1223+
min(base::setdiff(y_vals, low_outliers), na.rm = TRUE)
1224+
1225+
stat_max_excl_high_outliers <-
1226+
max(base::setdiff(y_vals, high_outliers), na.rm = TRUE)
1227+
1228+
plot_only_outliers <- length(y_vals) >= 20
1229+
1230+
if (plot_only_outliers) {
12211231

12221232
# Plot only outliers since the number of data values is sufficiently high
1223-
y_vals_plot <- y_vals[y_vals < stat_p05 | y_vals > stat_p95]
1233+
y_vals_plot <- c(low_outliers, high_outliers)
12241234

12251235
data_point_radius <- 4
12261236
data_point_stroke_width <- 2
1227-
data_point_stroke_color <- adjust_luminance(data_bar_stroke_color[1], steps = 0.75)
1228-
data_point_fill_color <- adjust_luminance(data_point_stroke_color[1], steps = 1.75)
1237+
1238+
data_point_stroke_color <-
1239+
adjust_luminance(data_bar_stroke_color[1], steps = 0.75)
1240+
1241+
data_point_fill_color <-
1242+
adjust_luminance(data_point_stroke_color[1], steps = 1.75)
12291243

12301244
} else {
12311245

12321246
# Plot all data values but diminish the visibility of the data points
1233-
# as the number approaches 25
1247+
# as the number approaches 20
12341248
y_vals_plot <- y_vals
12351249

12361250
if (length(y_vals) < 10) {
@@ -1241,7 +1255,9 @@ generate_nanoplot <- function(
12411255
data_point_stroke_width <- 2
12421256
}
12431257

1244-
data_point_stroke_color <- adjust_luminance("black", steps = length(y_vals) / 25)
1258+
data_point_stroke_color <-
1259+
adjust_luminance("black", steps = length(y_vals) / 25)
1260+
12451261
data_point_fill_color <- "transparent"
12461262
}
12471263

@@ -1251,42 +1267,53 @@ generate_nanoplot <- function(
12511267
vals = y_vals,
12521268
all_vals = all_y_vals,
12531269
y_vals_plot = y_vals_plot,
1254-
stat_low = stat_p05,
1270+
stat_min = stat_min_excl_low_outliers,
12551271
stat_qlow = stat_q_1,
12561272
stat_med = stat_med,
12571273
stat_qup = stat_q_3,
1258-
stat_high = stat_p95
1274+
stat_max = stat_max_excl_high_outliers
12591275
)
12601276

12611277
y_proportions <- y_proportions_list[["vals"]]
12621278
y_proportions_plot <- y_proportions_list[["y_vals_plot"]]
1263-
y_stat_p05 <- y_proportions_list[["stat_low"]]
1279+
y_stat_min <- y_proportions_list[["stat_min"]]
12641280
y_stat_q_1 <- y_proportions_list[["stat_qlow"]]
12651281
y_stat_med <- y_proportions_list[["stat_med"]]
12661282
y_stat_q_3 <- y_proportions_list[["stat_qup"]]
1267-
y_stat_p95 <- y_proportions_list[["stat_high"]]
1283+
y_stat_max <- y_proportions_list[["stat_max"]]
12681284

12691285
# Calculate boxplot x values
1270-
fence_start <- y_stat_p05 * data_x_width
1286+
fence_start <- y_stat_min * data_x_width
12711287
box_start <- y_stat_q_1 * data_x_width
12721288
median_x <- y_stat_med * data_x_width
12731289
box_end <- y_stat_q_3 * data_x_width
1274-
fence_end <- y_stat_p95 * data_x_width
1290+
fence_end <- y_stat_max * data_x_width
12751291
box_width <- (y_stat_q_3 - y_stat_q_1) * data_x_width
12761292

12771293
# Establish positions for plottable x and y values
12781294
plotted_x_vals <- y_proportions_plot * data_x_width
12791295

12801296
if (length(y_vals) == 1) {
1297+
12811298
plotted_y_vals <- bottom_y / 2
1299+
12821300
} else {
1283-
plotted_y_vals <- jitter(rep(bottom_y / 2, length(plotted_x_vals)), factor = 10)
1301+
1302+
if (plot_only_outliers) {
1303+
1304+
plotted_y_vals <- rep(bottom_y / 2, length(plotted_x_vals))
1305+
1306+
} else {
1307+
1308+
plotted_y_vals <-
1309+
jitter(rep(bottom_y / 2, length(plotted_x_vals)), factor = 10)
1310+
}
12841311
}
12851312

12861313
# Format numbers compactly
1287-
stat_p05_value <-
1314+
stat_min_value <-
12881315
format_number_compactly(
1289-
val = stat_p05,
1316+
val = stat_min_excl_low_outliers,
12901317
currency = currency,
12911318
fn = y_val_fmt_fn
12921319
)
@@ -1308,9 +1335,9 @@ generate_nanoplot <- function(
13081335
currency = currency,
13091336
fn = y_val_fmt_fn
13101337
)
1311-
stat_p95_value <-
1338+
stat_max_value <-
13121339
format_number_compactly(
1313-
val = stat_p95,
1340+
val = stat_max_excl_high_outliers,
13141341
currency = currency,
13151342
fn = y_val_fmt_fn
13161343
)
@@ -1358,7 +1385,7 @@ generate_nanoplot <- function(
13581385
"font-size=\"30px\" ",
13591386
"text-anchor=\"end\"",
13601387
">",
1361-
stat_p05_value,
1388+
stat_min_value,
13621389
"</text>",
13631390
"<text ",
13641391
"x=\"", box_start - 6, "\" ",
@@ -1396,7 +1423,7 @@ generate_nanoplot <- function(
13961423
"stroke=\"transparent\" ",
13971424
"font-size=\"30px\"",
13981425
">",
1399-
stat_p95_value,
1426+
stat_max_value,
14001427
"</text>"
14011428
)
14021429
}

images/man_cols_nanoplot_7.png

-40.5 KB
Loading

man/cols_nanoplot.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/info_date_style.Rd

Lines changed: 9 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)