From ef314b934b98634f4585b4a08e68df35d727a884 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Tue, 21 Jan 2025 02:02:50 +0530 Subject: [PATCH 01/14] Setup Docker inside the dev container --- .devcontainer/devcontainer.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index cfa5f16098..ec576172f1 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,9 +1,10 @@ { - "build": { "dockerfile": "r-devel-gcc/Dockerfile", "context": ".." }, + "build": { "dockerfile": "Dockerfile", "context": "../.." }, "customizations": { "vscode": { "extensions": [ "REditorSupport.r", "ms-vscode.cpptools-extension-pack" ] - }} + }}, + "postCreateCommand": "apt-get update && apt-get install -y docker.io" } From 0be4b626482620bf0aecaa35b4a4bb27f5a74316 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Tue, 21 Jan 2025 21:23:59 +0530 Subject: [PATCH 02/14] Trigger CI From 7bddf44a913bad6045877a8abb0109cbb43914d0 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Tue, 21 Jan 2025 21:58:10 +0530 Subject: [PATCH 03/14] Add GitHub Actions CI/CD workflow --- .github/workflows/ci-cd.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/ci-cd.yml diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml new file mode 100644 index 0000000000..af9245e53a --- /dev/null +++ b/.github/workflows/ci-cd.yml @@ -0,0 +1,32 @@ +name: R Project CI/CD + +on: + push: + branches: + - main + - setup-github-actions + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up R + uses: r-lib/actions/setup-r@v2 + with: + r-version: devel + + - name: Install dependencies + run: | + install.packages("devtools") + devtools::install_deps(dependencies = TRUE) + + - name: Run tests + run: | + R CMD check --as-cran From 4e9bd73d2cbf334a960d44be29e879acaac880e0 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Wed, 22 Jan 2025 12:58:28 +0530 Subject: [PATCH 04/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- R/data.table.R | 61 +++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 8ef2924fef..779df606c4 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -22,6 +22,26 @@ methods::setPackageName("data.table",.global) is.data.table = function(x) inherits(x, "data.table") is.ff = function(x) inherits(x, "ff") # define this in data.table so that we don't have to require(ff), but if user is using ff we'd like it to work +# Helper function to process assignment operations in lists or environments. +# Used internally for efficient recursive assignments in data.table. + +process_assignment <- function(name, x, parent_env) { + k = eval(name[[2L]], parent_env, parent_env) + if (is.list(k)) { + origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent_env, parent_env) + if (is.character(j)) { + if (length(j) != 1L) + stopf("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but its length is %d", length(j)) + j = match(j, names(k)) + if (is.na(j)) + stopf("Item '%s' not found in names of input list", origj) + } + .Call(Csetlistelt, k, as.integer(j), x) + } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { + assign(as.character(name[[3L]]), x, k, inherits = FALSE) + } +} + #NCOL = function(x) { # # copied from base, but additionally covers data.table via is.list() # # because NCOL in base explicitly tests using is.data.frame() @@ -1214,22 +1234,9 @@ replace_dot_alias = function(e) { setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope if (is.name(name)) { assign(as.character(name),x,parent.frame(),inherits=TRUE) - } else if (.is_simple_extraction(name)) { # TODO(#6702): use a helper here as the code is very similar to setDT(). - k = eval(name[[2L]], parent.frame(), parent.frame()) - if (is.list(k)) { - origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame()) - if (is.character(j)) { - if (length(j)!=1L) stopf("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but its length is %d", length(j)) - j = match(j, names(k)) - if (is.na(j)) internal_error("item '%s' not found in names of list", origj) # nocov - } - .Call(Csetlistelt,k,as.integer(j), x) - } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { - assign(as.character(name[[3L]]), x, k, inherits=FALSE) - } else if (isS4(k)) { - .Call(CsetS4elt, k, as.character(name[[3L]]), x) - } - } # TO DO: else if env$<- or list$<- + } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { + process_assignment(name, x, parent.frame()) + } } } } @@ -2962,25 +2969,13 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { if (is.name(name)) { name = as.character(name) assign(name, x, parent.frame(), inherits=TRUE) - } else if (.is_simple_extraction(name)) { - # common case is call from 'lapply()' - k = eval(name[[2L]], parent.frame(), parent.frame()) - if (is.list(k)) { - origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame()) - if (length(j) == 1L) { - if (is.character(j)) { - j = match(j, names(k)) - if (is.na(j)) - stopf("Item '%s' not found in names of input list", origj) - } - } - .Call(Csetlistelt, k, as.integer(j), x) - } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { - assign(as.character(name[[3L]]), x, k, inherits=FALSE) - } else if (isS4(k)) { + } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { + process_assignment(name, x, parent.frame()) + } + else if (isS4(k)) { .Call(CsetS4elt, k, as.character(name[[3L]]), x) } - } else if (name %iscall% "get") { # #6725 + else if (name %iscall% "get") { # #6725 # edit 'get(nm, env)' call to be 'assign(nm, x, envir=env)' name = match.call(get, name) name[[1L]] = quote(assign) From 97e1517c69c8f546def4035a0fc5616d1d027a0b Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Wed, 22 Jan 2025 15:41:12 +0530 Subject: [PATCH 05/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- R/data.table.R | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 779df606c4..0922c9d92b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -26,20 +26,20 @@ is.ff = function(x) inherits(x, "ff") # define this in data.table so that we do # Used internally for efficient recursive assignments in data.table. process_assignment <- function(name, x, parent_env) { - k = eval(name[[2L]], parent_env, parent_env) - if (is.list(k)) { - origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent_env, parent_env) - if (is.character(j)) { - if (length(j) != 1L) - stopf("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but its length is %d", length(j)) - j = match(j, names(k)) - if (is.na(j)) - stopf("Item '%s' not found in names of input list", origj) - } - .Call(Csetlistelt, k, as.integer(j), x) - } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { - assign(as.character(name[[3L]]), x, k, inherits = FALSE) - } + k = eval(name[[2L]], parent_env, parent_env) + if (is.list(k)) { + origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent_env, parent_env) + if (is.character(j)) { + if (length(j) != 1L) + stopf("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but its length is %d", length(j)) + j = match(j, names(k)) + if (is.na(j)) + stopf("Item '%s' not found in names of input list", origj) + } + .Call(Csetlistelt, k, as.integer(j), x) + } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { + assign(as.character(name[[3L]]), x, k, inherits = FALSE) + } } #NCOL = function(x) { From b23d277b737006906358fe53beb3907659b50b38 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Wed, 22 Jan 2025 23:09:48 +0530 Subject: [PATCH 06/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1d31d232ce..64ffdb035d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3295,7 +3295,7 @@ test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x)) test(1035.291, melt(dt, measure.vars=NA_integer_, id.vars=NULL), error="One or more values in 'measure.vars'") test(1035.30, melt(dt, id.vars=NA_integer_), error="One or more values in 'id.vars'") test(1035.31, melt(dt, measure.vars=NA_character_), error="One or more values in 'measure.vars'") - test(1035.32, melt(dt, id.vars=NA_character_), error="One or more values in 'id.vars'") + test(1035.32, melt(dt, id.vars=NA_character_), error="One or more values in 'id.vars' is invalid.") if (test_R.utils) { # dup names in variable used to generate malformed factor error and/or segfault, #1754; was test 1570 From 76eb5bd2bd79f0737d8556553ec226c3ac80c595 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Wed, 22 Jan 2025 23:26:44 +0530 Subject: [PATCH 07/14] Reverted .devcontainer/devcontainer.json to match master --- .devcontainer/devcontainer.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ec576172f1..cfa5f16098 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,10 +1,9 @@ { - "build": { "dockerfile": "Dockerfile", "context": "../.." }, + "build": { "dockerfile": "r-devel-gcc/Dockerfile", "context": ".." }, "customizations": { "vscode": { "extensions": [ "REditorSupport.r", "ms-vscode.cpptools-extension-pack" ] - }}, - "postCreateCommand": "apt-get update && apt-get install -y docker.io" + }} } From e12f726d4bd761302f38b11c70c60412c6b41fb5 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Thu, 23 Jan 2025 01:22:20 +0530 Subject: [PATCH 08/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- R/fmelt.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/fmelt.R b/R/fmelt.R index 38ef064354..4b4203ecba 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -182,6 +182,12 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl value.name = "value", ..., na.rm = FALSE, variable.factor = TRUE, value.factor = FALSE, verbose = getOption("datatable.verbose")) { if (!is.data.table(data)) stopf("'data' must be a data.table") + + # Validate id.vars + if (any(is.na(id.vars) | id.vars == "")) { + stopf("One or more values in 'id.vars' is invalid.") + } + if (missing(id.vars)) id.vars=NULL if (missing(measure.vars)) measure.vars = NULL measure.sub = substitute(measure.vars) From cecfb0b269e3dff4c9817d7b51b1a3b142159042 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Thu, 23 Jan 2025 01:36:23 +0530 Subject: [PATCH 09/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- R/fmelt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fmelt.R b/R/fmelt.R index 4b4203ecba..9d9c733d32 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -184,7 +184,7 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl if (!is.data.table(data)) stopf("'data' must be a data.table") # Validate id.vars - if (any(is.na(id.vars) | id.vars == "")) { + if (any(is.na(id.vars) | !nzchar(id.vars))) { stopf("One or more values in 'id.vars' is invalid.") } From 3592327f91b45be3d3ec8d1c7a476ff3034d22ee Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Thu, 23 Jan 2025 01:54:21 +0530 Subject: [PATCH 10/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 64ffdb035d..b92eb6de76 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3295,7 +3295,7 @@ test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x)) test(1035.291, melt(dt, measure.vars=NA_integer_, id.vars=NULL), error="One or more values in 'measure.vars'") test(1035.30, melt(dt, id.vars=NA_integer_), error="One or more values in 'id.vars'") test(1035.31, melt(dt, measure.vars=NA_character_), error="One or more values in 'measure.vars'") - test(1035.32, melt(dt, id.vars=NA_character_), error="One or more values in 'id.vars' is invalid.") + test(1035.32, melt(dt, id.vars=NA_character_), error="One or more values in 'id.vars'.") if (test_R.utils) { # dup names in variable used to generate malformed factor error and/or segfault, #1754; was test 1570 From 52f9d89be4ca295fa197921a5c26ae1cad55485d Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Thu, 23 Jan 2025 01:58:57 +0530 Subject: [PATCH 11/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b92eb6de76..64ffdb035d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3295,7 +3295,7 @@ test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x)) test(1035.291, melt(dt, measure.vars=NA_integer_, id.vars=NULL), error="One or more values in 'measure.vars'") test(1035.30, melt(dt, id.vars=NA_integer_), error="One or more values in 'id.vars'") test(1035.31, melt(dt, measure.vars=NA_character_), error="One or more values in 'measure.vars'") - test(1035.32, melt(dt, id.vars=NA_character_), error="One or more values in 'id.vars'.") + test(1035.32, melt(dt, id.vars=NA_character_), error="One or more values in 'id.vars' is invalid.") if (test_R.utils) { # dup names in variable used to generate malformed factor error and/or segfault, #1754; was test 1570 From 9ee7835a5b4f1b5d45166596e3874112b5b7c23e Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Thu, 23 Jan 2025 09:58:54 +0530 Subject: [PATCH 12/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- .github/workflows/test-coverage.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index c46e655a52..89614b61af 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -4,6 +4,9 @@ on: push: branches: [master] pull_request: + branches: + - master + - refactor-helper-function-issue-6702 # Ensure this branch exists name: test-coverage.yaml @@ -21,7 +24,7 @@ jobs: - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - r-version: '4.3' # TODO(r-lib/covr#567): Go back to using r-devel + r-version: '4.3' # TODO: Go back to using r-devel - uses: r-lib/actions/setup-r-dependencies@v2 with: @@ -51,4 +54,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: coverage-test-failures - path: ${{ runner.temp }}/package + path: ${{ runner.temp }}/package \ No newline at end of file From 2e9835ecbf8c216d13d305bcbd33a18d150c0861 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Thu, 23 Jan 2025 10:07:42 +0530 Subject: [PATCH 13/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- .github/workflows/performance-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml index 10c6f2e3ab..176626acad 100644 --- a/.github/workflows/performance-tests.yml +++ b/.github/workflows/performance-tests.yml @@ -21,3 +21,5 @@ jobs: repo_token: ${{ secrets.GITHUB_TOKEN }} steps: - uses: Anirban166/Autocomment-atime-results@v1.4.1 + - name: Checkout branch + run: git checkout refactor-helper-function-issue-6702 || echo "Branch not found" \ No newline at end of file From f554637545c6e584b8b7212ef2e0c4c42eef6a86 Mon Sep 17 00:00:00 2001 From: nipun-gupta-3108 Date: Thu, 23 Jan 2025 10:34:09 +0530 Subject: [PATCH 14/14] Refactored duplicated logic in setDT and [,:=] to use a helper function --- .github/workflows/performance-tests.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml index 176626acad..69f190fb85 100644 --- a/.github/workflows/performance-tests.yml +++ b/.github/workflows/performance-tests.yml @@ -20,6 +20,13 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} repo_token: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: Anirban166/Autocomment-atime-results@v1.4.1 - - name: Checkout branch - run: git checkout refactor-helper-function-issue-6702 || echo "Branch not found" \ No newline at end of file + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: refactor-helper-function-issue-6702 + - run: | + echo "Current branch:" + git branch + echo "Listing all branches:" + git branch -a + - uses: Anirban166/Autocomment-atime-results@v1.4.1 \ No newline at end of file