ctsit · saipavan10-git · Mar 5, 2025 · Feb 24, 2025 · Feb 24, 2025 · Feb 24, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -38,6 +38,7 @@ Config/testthat/edition: 3
 Imports: 
     dplyr,
     lorem,
+    lubridate,
     purrr,
     rlang,
     tidyr

diff --git a/NAMESPACE b/NAMESPACE
@@ -9,4 +9,6 @@ export(get_long_slider_fields)
 export(get_long_text_field_values)
 export(get_long_text_fields)
 export(get_one_rectangle_of_values)
+importFrom(lubridate,now)
+importFrom(lubridate,today)
 importFrom(rlang,.data)
diff --git a/R/get_long_text_field_values.R b/R/get_long_text_field_values.R
@@ -9,6 +9,8 @@
 #' @return a long dataframe of text field values with one row for each value set.
 #' @export
 #'
+#' @importFrom lubridate today now
+#'
 #' @examples
 #' \dontrun{
 #' get_long_text_field_values(long_text_fields)
@@ -17,13 +19,117 @@ get_long_text_field_values <- function(long_text_fields) {
   tvt_na <- function(df) {
     df |>
       dplyr::filter(.data$tvt == "tvt_na") |>
-      dplyr::mutate(value = replicate(length(.data$field_name), lorem::ipsum_words(round(stats::rnorm(mean = .data$mean, sd = .data$sd, n = 1)))))
-    # |>
-    #   dplyr::slice_sample(prop = 0.5, weight_by = .data$weight)
+      dplyr::mutate(value = sapply(seq_len(nrow(df)), function(i) {
+        # Ensure the generated value is valid
+        n_words <- max(1, round(stats::rnorm(
+          n = 1,
+          mean = df$mean[i],
+          sd = df$sd[i]
+        )))
+        as.character(paste(lorem::ipsum_words(n_words), collapse = " "))
+      }))
+  }
+
+  tvt_datetime <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_datetime") |>
+      dplyr::mutate(value = {
+        anchor_time <- eval(parse(text = .data$origin_function))
+        # Generate random times around anchor_time
+        times <- as.POSIXct(
+          stats::rnorm(
+            n = nrow(df),
+            mean = as.numeric(anchor_time),
+            sd = .data$sd # Using summarized value
+          ),
+          origin = "1970-01-01"
+        )
+        # Convert to character in ISO format
+        format(times, "%Y-%m-%d %H:%M:%S")
+      })
+  }
+
+  tvt_date <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_date") |>
+      dplyr::mutate(value = {
+        anchor_date <- eval(parse(text = .data$origin_function))
+        # Generate random dates around anchor date
+        dates <- as.Date(
+          stats::rnorm(
+            n = nrow(df),
+            mean = as.numeric(anchor_date),
+            sd = .data$sd
+          ),
+          origin = "1970-01-01"
+        )
+        # Convert to character in ISO format based on the date format
+        format(dates, "%Y-%m-%d")
+      })
   }
 
+  tvt_email <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_email") |>
+      dplyr::mutate(
+        value = as.character(replicate(
+          length(.data$field_name),
+          paste0(lorem::ipsum_words(1), "@example.org")
+        ))
+      )
+  }
+
+  rtnorm <- function(n, mean, sd, a = NA, b = NA) {
+    a <- as.numeric(a)
+    b <- as.numeric(b)
+    a <- ifelse(is.na(a), -Inf, a)
+    b <- ifelse(is.na(b), Inf, b)
+    stats::qnorm(stats::runif(n, stats::pnorm(a, mean, sd), stats::pnorm(b, mean, sd)), mean, sd)
+  }
+
+  tvt_integer <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_integer") |>
+      dplyr::mutate(
+        value = as.character(round(rtnorm(
+          n = length(.data$field_name), mean = .data$mean, sd = .data$sd,
+          a = .data$text_validation_min, b = .data$text_validation_max
+        )))
+      )
+  }
+
+  tvt_number <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_number") |>
+      dplyr::mutate(
+        value = as.character(round(rtnorm(
+          n = length(.data$field_name), mean = .data$mean, sd = .data$sd,
+          a = .data$text_validation_min, b = .data$text_validation_max
+        ), digits = 2))
+      )
+  }
+
+  tvt_zipcode <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_zipcode") |>
+      dplyr::mutate(
+        value = "32611"
+      )
+  }
+
+  tvt_phone <- function(df) {
+    df |>
+      dplyr::filter(.data$tvt == "tvt_phone") |>
+      dplyr::mutate(
+        value = "800-867-5309"
+      )
+  }
+
+
   tvt_types <- c(
-    "tvt_na"
+    "tvt_na", "tvt_datetime", "tvt_date",
+    "tvt_email", "tvt_integer", "tvt_number",
+    "tvt_zipcode", "tvt_phone"
   )
 
   process_one_text_validation_type <- function(my_tvt, df) {
@@ -35,11 +141,21 @@ get_long_text_field_values <- function(long_text_fields) {
   }
 
   text_field_values <-
-    purrr::map(tvt_types,
-               process_one_text_validation_type,
-               long_text_fields |> dplyr::filter(.data$field_type == "text")
-               ) |>
-    dplyr::bind_rows()
+    purrr::map(
+      tvt_types,
+      process_one_text_validation_type,
+      long_text_fields |> dplyr::filter(.data$field_type == "text")
+    ) |>
+    # get rid of empty data frames in the list output
+    purrr::keep(~ nrow(.x) > 0)
+
+  # If the list is empty after filtering, create a minimal empty data frame
+  if (length(text_field_values) == 0) {
+    text_field_values <- dplyr::tibble(field_name = character(0), value = character(0))
+  } else {
+    # Otherwise, combine all data frames in the list
+    text_field_values <- dplyr::bind_rows(text_field_values)
+  }
 
   result <- text_field_values |>
     dplyr::select("field_name", "value")

diff --git a/R/get_long_text_fields.R b/R/get_long_text_fields.R
@@ -1,7 +1,8 @@
 #' @title Get every text field response from a REDCap data dictionary
 #'
 #' @description
-#' Given a REDCap data dictionary, enumerate every text field in that data dictionary and return a dataset with default weights
+#' Given a REDCap data dictionary, enumerate every text field in that data dictionary
+#' and return a dataset with default weights
 #'
 #' @param metadata A REDCap data dictionary
 #'
@@ -42,8 +43,19 @@ get_long_text_fields <- function(metadata) {
       "text_validation_max"
     )) |>
     dplyr::rename(text_validation_type = "text_validation_type_or_show_slider_number") |>
+    dplyr::mutate(
+      text_validation_min = as.character(.data$text_validation_min),
+      text_validation_max = as.character(.data$text_validation_max)
+    ) |>
     dplyr::mutate(tvt = dplyr::case_when(
       is.na(.data$text_validation_type) ~ "tvt_na",
+      grepl("^datetime.*", .data$text_validation_type) ~ "tvt_datetime",
+      grepl("^date_", .data$text_validation_type) ~ "tvt_date",
+      grepl("email", .data$text_validation_type) ~ "tvt_email",
+      grepl("integer", .data$text_validation_type) ~ "tvt_integer",
+      grepl("number", .data$text_validation_type) ~ "tvt_number",
+      grepl("zipcode", .data$text_validation_type) ~ "tvt_zipcode",
+      grepl("phone", .data$text_validation_type) ~ "tvt_phone",
       TRUE ~ "tvt_unsupported"
     )) |>
     # set weights for each response
@@ -57,6 +69,104 @@ get_long_text_fields <- function(metadata) {
     return(result)
   }
 
+  tvt_email <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(grepl("email", .data$text_validation_type)) |>
+      dplyr::mutate(weight = 100)
+    return(result)
+  }
+
+  tvt_datetime <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(grepl("^datetime.*", .data$text_validation_type)) |>
+      dplyr::mutate(
+        text_validation_min = dplyr::case_when(
+          grepl("^\\[.*\\]$", .data$text_validation_min) ~ NA_character_,
+          grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_min) ~ .data$text_validation_min,
+          grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_min) ~ as.character(.data$text_validation_min),
+          TRUE ~ NA_character_
+        ),
+        text_validation_max = dplyr::case_when(
+          grepl("^\\[.*\\]$", .data$text_validation_max) ~ NA_character_,
+          grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_max) ~ .data$text_validation_max,
+          grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_max) ~ as.character(.data$text_validation_max),
+          TRUE ~ NA_character_
+        ),
+        origin_function = "lubridate::now()",
+        sd = 3600 * 24
+      )
+    return(result)
+  }
+
+  tvt_date <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(grepl("^date_", .data$text_validation_type)) |>
+      dplyr::mutate(
+        text_validation_min = dplyr::case_when(
+          grepl("^\\[.*\\]$", .data$text_validation_min) ~ NA_character_,
+          grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_min) ~ .data$text_validation_min,
+          grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_min) ~ as.character(.data$text_validation_min),
+          TRUE ~ NA_character_
+        ),
+        text_validation_max = dplyr::case_when(
+          grepl("^\\[.*\\]$", .data$text_validation_max) ~ NA_character_,
+          grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_max) ~ .data$text_validation_max,
+          grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_max) ~ as.character(.data$text_validation_max),
+          TRUE ~ NA_character_
+        ),
+        origin_function = "lubridate::today()",
+        sd = 3600 * 24
+      )
+    return(result)
+  }
+
+  tvt_integer <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(grepl("integer", .data$text_validation_type)) |>
+      dplyr::mutate(
+        min_val = dplyr::coalesce(as.numeric(.data$text_validation_min), as.numeric(.data$text_validation_max) - 10, 0),
+        max_val = dplyr::coalesce(as.numeric(.data$text_validation_max), as.numeric(.data$text_validation_min) + 10, 10),
+        mean = (.data$min_val + .data$max_val) / 2,
+        sd = (.data$max_val - .data$min_val) / 6
+      ) |>
+      dplyr::select(-"min_val", -"max_val")
+    return(result)
+  }
+
+  tvt_number <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(grepl("number", .data$text_validation_type)) |>
+      dplyr::mutate(
+        min_val = dplyr::coalesce(as.numeric(.data$text_validation_min), as.numeric(.data$text_validation_max) - 10, 0),
+        max_val = dplyr::coalesce(as.numeric(.data$text_validation_max), as.numeric(.data$text_validation_min) + 10, 10),
+        mean = (.data$min_val + .data$max_val) / 2,
+        sd = (.data$max_val - .data$min_val) / 6
+      ) |>
+      dplyr::select(-"min_val", -"max_val")
+    return(result)
+  }
+
+  tvt_zipcode <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(grepl("zipcode", .data$text_validation_type)) |>
+      dplyr::mutate(weight = 100)
+    return(result)
+  }
+
+  tvt_phone <- function(text_fields) {
+    result <-
+      text_fields |>
+      dplyr::filter(grepl("phone", .data$text_validation_type)) |>
+      dplyr::mutate(weight = 100)
+    return(result)
+  }
+
   tvt_unsupported <- function(text_fields) {
     result <-
       text_fields |>
@@ -65,8 +175,9 @@ get_long_text_fields <- function(metadata) {
   }
 
   tvt_types <- c(
-    "tvt_na",
-    "tvt_unsupported"
+    "tvt_na", "tvt_datetime", "tvt_unsupported", "tvt_date",
+    "tvt_email", "tvt_integer", "tvt_number", "tvt_zipcode",
+    "tvt_phone"
   )
 
   process_one_text_validation_type <- function(my_tvt, df) {

diff --git a/README.md b/README.md
@@ -19,26 +19,28 @@ Copy [proof_of_concept.R](https://github.com/ctsit/redcapfiller/blob/main/proof_
 
 ### Limitations
 
-REDCap Filler does not yet understand all the dimensions of a modern REDCap project. It can fill the categorical fields. It can fill unvalidated text fields. It ignores all other field types and will not attempt to fill them. It doesn't even know how to fill a date field. :-( Filler only knows how to fill classic projects without repeating forms or events. It does not honor form display logic and ignores all fields governed by branching logic.
+REDCap Filler does not yet understand all the dimensions of a modern REDCap project. It can fill the categorical fields. It can fill unvalidated text field and the text validation types date, datetime, email, integer, number, phone, and zipcode. It ignores all other field types and validation types and will not attempt to fill them. Filler only knows how to fill classic projects without repeating forms or events. It does not honor form display logic and ignores all fields governed by branching logic.
 
-Focusing more on what Filler _can_ do, the first release milestone will support these features:
+Focusing more on what Filler _can_ or _will_ do, the first release milestone will support these features:
 
-- Use the REDCap data dictionary as the primary input. Add a few parameters to define record count and forms to fill
+- Use the REDCap data dictionary as the primary input. Add a few parameters to define record count and forms to fill.
 - Work on classic projects with no repeating objects.
 - Fill the forms named in a vector of form names.
-- Fill out every field on the named forms as long as there is no BL constraint.
+- Fill out every field on the named forms as long as there is no BL constraint. (Note: Timeline constraints might delay some text validation types)
 - Do not violate any data constraints.
 - Inject a simple default randomness wherever practicable.
 - Set the form completed fields to green.
 - Use the REDCap API to read the data dictionary and write the data.
 - Provide a uniform distribution on categorical fields.
+- Provide normal distribution on numeric and date fields.
 
 ### Futures
 
 This project aims to populate complex REDCap projects using the project design. If the REDCap API exposes a design dimension, we plan to use that to guide how the Filler populates projects. Yet, that will take some time to develop fully. This is the proposed timeline of features:
 
 1. Populate all fields in a classic project with no repeating objects, ignoring fields with BL constraints and completely ignoring FDL.
 2. Add support for longitudinal projects
+3. Complete any text validation types missing in the first release.
 3. Add support for repeating events and repeating forms.
 4. Allow non-uniform distribution of categorical fields. Allow non-default distributions on ranged fields.
 5. Allow inter-record and intra-record date offsets.

diff --git a/man/get_long_text_fields.Rd b/man/get_long_text_fields.Rd
diff --git a/tests/testthat/get_long_text_field_values/input.rds b/tests/testthat/get_long_text_field_values/input.rds
diff --git a/tests/testthat/get_one_rectangle_of_values/input.rds b/tests/testthat/get_one_rectangle_of_values/input.rds
diff --git a/tests/testthat/get_one_rectangle_of_values/make_test_data.R b/tests/testthat/get_one_rectangle_of_values/make_test_data.R
@@ -1,9 +1,11 @@
-metadata_file <- testthat::test_path("get_one_rectangle_of_values", "metadata.csv")
+metadata_file <- testthat::test_path("shared_testdata", "metadata.csv")
 metadata <- readr::read_csv(metadata_file)
 
 long_fields_and_responses <- dplyr::bind_rows(
   get_long_categorical_field_responses(metadata),
-  get_long_text_fields(metadata)
+  get_long_text_fields(metadata),
+  get_long_notes_fields(metadata),
+  get_long_slider_fields(metadata)
 )
 
 long_fields_and_responses |>

diff --git a/tests/testthat/test-get_long_text_field_values.R b/tests/testthat/test-get_long_text_field_values.R
@@ -31,3 +31,19 @@ testthat::test_that("get_long_text_field_values returns a varying vector of valu
   ))
 })
 
+testthat::test_that("get_long_text_field_values returns a value field for date and datetime fields", {
+  date_fields <- output$field_name[grepl("^date.", output$field_name)]
+  datetime_fields <- output$field_name[grepl("^datetime.", output$field_name)]
+
+  date_values <- output$value[output$field_name %in% date_fields]
+  datetime_values <- output$value[output$field_name %in% datetime_fields]
+
+  testthat::expect_true(length(date_fields) > 0 | length(datetime_fields) > 0)
+  testthat::expect_true(all(date_values != "") & all(datetime_values != ""))
+})
+
+# test that every value is a character
+testthat::test_that("get_long_text_field_values returns a character vector", {
+  testthat::expect_true(all(sapply(output$value, is.character)))
+})
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -38,6 +38,7 @@ Config/testthat/edition: 3 @@
     Imports:
         dplyr,
         lorem,
+        lubridate,
         purrr,
         rlang,
         tidyr
@@ Expand Down @@