From 28edd2b77963f05b2ca5c42fd789ec4398215cf0 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 7 Dec 2024 00:34:27 +0000 Subject: [PATCH 1/2] small refactor of bmerge() to check roll-on-factor condition first --- R/between.R | 6 ++++-- R/bmerge.R | 18 +++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/R/between.R b/R/between.R index e964347329..ab968b6eb7 100644 --- a/R/between.R +++ b/R/between.R @@ -76,8 +76,10 @@ inrange = function(x,lower,upper,incbounds=TRUE) { verbose = isTRUE(getOption("datatable.verbose")) if (verbose) {last.started.at=proc.time();catf("forderv(query) took ... ");flush.console()} if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} - ans = bmerge(shallow(subject), query, 1L:2L, c(1L,1L), - 0, c(FALSE, TRUE), 0L, "all", ops, verbose) # fix for #1819, turn on verbose messages + ans = bmerge( + shallow(subject), query, + icols=1L:2L, xcols=c(1L,1L), + roll=0.0, rollends=c(FALSE, TRUE), nomatch=0L, mult="all", ops, verbose) # fix for #1819, turn on verbose messages xo = ans$xo options(datatable.verbose=FALSE) setDT(ans[c("starts", "lens")], key=c("starts", "lens")) diff --git a/R/bmerge.R b/R/bmerge.R index 317fe2f645..bd6e8b7b0b 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -22,6 +22,13 @@ coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_m bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbose) { + if (roll != 0.0 && length(icols)) { + last_x_idx = tail(xcols, 1L) + last_i_idx = tail(icols, 1L) + if (is.factor(x[[last_x_idx]]) || is.factor(i[[last_i_idx]])) + stopf("Attempting roll join on factor column when joining x.%s to i.%s. Only integer, double or character columns may be roll joined.", names(x)[last_x_idx], names(i)[last_i_idx]) + } + callersi = i i = shallow(i) # Just before the call to bmerge() in [.data.table there is a shallow() copy of i to prevent coercions here @@ -59,9 +66,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos iname = paste0("i.", names(i)[icol]) if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type == i_merge_type) { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + next + } if (x_merge_type=="factor" || i_merge_type=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) if (x_merge_type=="factor" && i_merge_type=="factor") { if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]]) # nomatch=0L otherwise a level that is missing would match to NA values @@ -82,11 +91,6 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } - # we check factors first to cater for the case when trying to do rolling joins on factors - if (x_merge_type == i_merge_type) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) - next - } cfl = c("character", "logical", "factor") if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) { msg = if(verbose) gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") else NULL From 015827255d5186e7df4b4dafa77f297e5323e02a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 7 Dec 2024 02:05:08 +0000 Subject: [PATCH 2/2] restore factor checks as coming first --- R/bmerge.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index f3b59303c8..ef7790f278 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -66,10 +66,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos iname = paste0("i.", names(i)[icol]) if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) - if (x_merge_type == i_merge_type) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) - next - } + # we check factors first because they might have different levels if (x_merge_type=="factor" || i_merge_type=="factor") { if (x_merge_type=="factor" && i_merge_type=="factor") { if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) @@ -91,6 +88,10 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } + if (x_merge_type == i_merge_type) { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + next + } cfl = c("character", "logical", "factor") if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) { msg = if(verbose) gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") else NULL