@@ -447,18 +447,8 @@ wilkinson_bin_to_right = function(x, width) {
447447 bins = wilkinson_bin_to_right_(x , width )
448448
449449 # determine bin positions
450- # can take advantage of the fact that bins is sorted runs of numbers to
451- # get the first and last entry from each bin
452- bin_left = x [! duplicated(bins )]
453- bin_right = x [! duplicated(bins , fromLast = TRUE )]
454- bin_midpoints = (bin_left + bin_right ) / 2
455-
456- list (
457- bins = bins ,
458- bin_midpoints = bin_midpoints ,
459- bin_left = bin_left ,
460- bin_right = bin_right
461- )
450+ bin_runs = rle_bins(bins )
451+ locate_bins(bin_runs , x )
462452}
463453
464454# ' do a backwards sweep after a left-to-right wilkinson binning, trying to
@@ -507,16 +497,9 @@ wilkinson_sweep_back = function(x, b, width, first_slack = Inf) {
507497 b $ bins [changed_x_is ] = bins_changed
508498
509499 # re-number bins to be consecutive in case some bins got removed completely
510- first_x_in_bin = ! duplicated(b $ bins )
511- b $ bins = cumsum(first_x_in_bin )
512-
513- # can take advantage of the fact that b$bins is sorted runs of numbers to
514- # get the first and last entry from each bin
515- b $ bin_left = x [first_x_in_bin ]
516- b $ bin_right = x [! duplicated(b $ bins , fromLast = TRUE )]
517- b $ bin_midpoints = (b $ bin_left + b $ bin_right ) / 2
518-
519- b
500+ bin_runs = rle_bins(b $ bins )
501+ bin_runs = renumber_bins(bin_runs )
502+ locate_bins(bin_runs , x )
520503}
521504
522505# ' a rightward or leftward wilkinson binning followed by a backwards sweep to
@@ -536,17 +519,19 @@ wilkinson_bin = function(x, width, right = TRUE, first_slack = Inf) {
536519
537520 if (right ) {
538521 b = wilkinson_bin_to_right(x , width )
539- wilkinson_sweep_back(x , b , width , first_slack = first_slack )
522+ b = wilkinson_sweep_back(x , b , width , first_slack = first_slack )
540523 } else {
541524 rev_x = - rev(x )
542525 b = wilkinson_bin_to_right(rev_x , width )
543526 b = wilkinson_sweep_back(rev_x , b , width , first_slack = first_slack )
544- list (
527+ b = list (
545528 # renumber bins so 1,2,3,3 => 3,2,1,1 (then reverse so it matches original vector order)
546529 bins = rev(max(b $ bins ) + 1 - b $ bins ),
547530 bin_midpoints = - rev(b $ bin_midpoints )
548531 )
549532 }
533+
534+ wilkinson_smooth(x , b , width )
550535}
551536
552537# ' A modified wilkinson-style binning that expands outward from the center of
@@ -617,13 +602,98 @@ wilkinson_bin_from_center = function(x, width) {
617602 )
618603
619604 center_bin_i = length(left $ bin_midpoints ) + 1
620- list (
605+ b = list (
621606 bins = c(left $ bins , rep(center_bin_i , n_center ), center_bin_i + right $ bins ),
622607 bin_midpoints = c(left $ bin_midpoints , center_midpoint , right $ bin_midpoints )
623608 )
609+
610+ wilkinson_smooth(x , b , width )
624611 }
625612}
626613
614+ # ' Get the run-length encoding of the bins in a Wilkinson binning
615+ # ' @param bins <[list]> sorted bin numbers starting at `1`, as in the
616+ # ' `"bins"` element of output from `wilkinson_` functions.
617+ # ' @returns <[data.frame]> with columns:
618+ # ' - `"bin"`: `unique(bins)`
619+ # ' - `"count"`: occurrences of each bin.
620+ # ' @noRd
621+ rle_bins = function (bins ) {
622+ out = vec_unrep(bins )
623+ names(out ) = c(" bin" , " count" )
624+ out
625+ }
626+
627+ # ' Re-number bins to be consecutive
628+ # '
629+ # ' Removes empty bins and gaps in bins
630+ # ' @param bin_runs <[data.frame]> as returned by `rle_bins()`. Missing bins
631+ # ' will be removed: the `"bin"` element should be increasing but need
632+ # ' not be consecutive and the `"count"` element may have zeros.
633+ # ' @returns modified `bin_runs` with consecutive `"bin"` element starting
634+ # ' at zero and `"count"` all positive.
635+ # ' @noRd
636+ renumber_bins = function (bin_runs ) {
637+ bin_runs = bin_runs [bin_runs $ count > 0 , ]
638+ bin_runs $ bin = seq_len(nrow(bin_runs ))
639+ bin_runs
640+ }
641+
642+ # ' Convert run-length encoded bins into wilkinson binning format and find bin locations
643+ # ' @param bin_runs <[data.frame]> as returned by `rle_bins()`. The
644+ # ' `"bin"` element should be consecutive starting from `1`and the
645+ # ' `"count"` element should not have zeros.
646+ # ' @param x <[numeric]> data values to be binned
647+ # ' @param b <[list]> as returned by `wilkinson_` methods.
648+ # ' @returns <[list]> binning format returned by `wilkinson_` methods, with
649+ # ' elements `"bins"`, `"bin_left"`, `"bin_right"`, `"bin_midpoints"`.
650+ # ' @noRd
651+ locate_bins = function (bin_runs , x ) {
652+ bins = rep.int(bin_runs $ bin , times = bin_runs $ count )
653+
654+ # can take advantage of the fact that bins is sorted runs of numbers to
655+ # get the first and last entry from each bin
656+ bin_left = x [bins != c(0 , bins [- length(bins )])]
657+ bin_right = x [bins != c(bins [- 1 ], 0 )]
658+ bin_midpoints = (bin_left + bin_right ) / 2
659+
660+ list (
661+ bins = bins ,
662+ bin_left = bin_left ,
663+ bin_right = bin_right ,
664+ bin_midpoints = bin_midpoints
665+ )
666+ }
667+
668+ # ' Adjacent-bin moving average smooth as described in Wilkinson
669+ # ' @description
670+ # ' Exchanges dots between adjacent bins in a dotplot as described in Wilkinson.
671+ # ' @param x <[numeric]> sorted data values
672+ # ' @param b <[numeric]> binning as returned by other `wilkinson_` functions: a
673+ # ' [list] with elements `bins` (consecutive integers starting at `1` with the same
674+ # ' length as `x`) and `bin_midpoints` (having length equal to `max(bins)`).
675+ # ' @param width <scalar [numeric]> positive bin width
676+ # ' @param span <scalar [numeric]> multiple of bin width giving the window within
677+ # ' which to consider the next bin "adjacent". If `0`, no smoothing is done. A value
678+ # ' of `1.25` is equivalent to Wilkinson's recommended smoothing if bins are within
679+ # ' `width/4` of each other.
680+ # ' @noRd
681+ wilkinson_smooth = function (x , b , width , span = 0 ) {
682+ if (span == 0 ) return (b )
683+ window = width * span
684+
685+ bin_runs = rle_bins(b $ bins )
686+ for (i in seq_len(nrow(bin_runs ) - 1 )) {
687+ if (b $ bin_midpoints [[i + 1 ]] - b $ bin_midpoints [[i ]] < = window ) {
688+ dots_to_move = floor((bin_runs $ count [[i + 1 ]] - bin_runs $ count [[i ]]) / 2 )
689+ bin_runs $ count [[i ]] = bin_runs $ count [[i ]] + dots_to_move
690+ bin_runs $ count [[i + 1 ]] = bin_runs $ count [[i + 1 ]] - dots_to_move
691+ }
692+ }
693+
694+ bin_runs = renumber_bins(bin_runs )
695+ locate_bins(bin_runs , x )
696+ }
627697
628698# grid swarm -------------------------------------------------------------
629699
0 commit comments