Skip to content

Commit a66fcf9

Browse files
Merge pull request #26 from saipavan10-git/add_support_for_dt
Add support for various tvt fields
2 parents 23a7bca + e1a07a5 commit a66fcf9

File tree

12 files changed

+342
-41
lines changed

12 files changed

+342
-41
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Config/testthat/edition: 3
3838
Imports:
3939
dplyr,
4040
lorem,
41+
lubridate,
4142
purrr,
4243
rlang,
4344
tidyr

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ export(get_long_slider_fields)
99
export(get_long_text_field_values)
1010
export(get_long_text_fields)
1111
export(get_one_rectangle_of_values)
12+
importFrom(lubridate,now)
13+
importFrom(lubridate,today)
1214
importFrom(rlang,.data)

R/get_long_text_field_values.R

Lines changed: 125 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#' @return a long dataframe of text field values with one row for each value set.
1010
#' @export
1111
#'
12+
#' @importFrom lubridate today now
13+
#'
1214
#' @examples
1315
#' \dontrun{
1416
#' get_long_text_field_values(long_text_fields)
@@ -17,13 +19,117 @@ get_long_text_field_values <- function(long_text_fields) {
1719
tvt_na <- function(df) {
1820
df |>
1921
dplyr::filter(.data$tvt == "tvt_na") |>
20-
dplyr::mutate(value = replicate(length(.data$field_name), lorem::ipsum_words(round(stats::rnorm(mean = .data$mean, sd = .data$sd, n = 1)))))
21-
# |>
22-
# dplyr::slice_sample(prop = 0.5, weight_by = .data$weight)
22+
dplyr::mutate(value = sapply(seq_len(nrow(df)), function(i) {
23+
# Ensure the generated value is valid
24+
n_words <- max(1, round(stats::rnorm(
25+
n = 1,
26+
mean = df$mean[i],
27+
sd = df$sd[i]
28+
)))
29+
as.character(paste(lorem::ipsum_words(n_words), collapse = " "))
30+
}))
31+
}
32+
33+
tvt_datetime <- function(df) {
34+
df |>
35+
dplyr::filter(.data$tvt == "tvt_datetime") |>
36+
dplyr::mutate(value = {
37+
anchor_time <- eval(parse(text = .data$origin_function))
38+
# Generate random times around anchor_time
39+
times <- as.POSIXct(
40+
stats::rnorm(
41+
n = nrow(df),
42+
mean = as.numeric(anchor_time),
43+
sd = .data$sd # Using summarized value
44+
),
45+
origin = "1970-01-01"
46+
)
47+
# Convert to character in ISO format
48+
format(times, "%Y-%m-%d %H:%M:%S")
49+
})
50+
}
51+
52+
tvt_date <- function(df) {
53+
df |>
54+
dplyr::filter(.data$tvt == "tvt_date") |>
55+
dplyr::mutate(value = {
56+
anchor_date <- eval(parse(text = .data$origin_function))
57+
# Generate random dates around anchor date
58+
dates <- as.Date(
59+
stats::rnorm(
60+
n = nrow(df),
61+
mean = as.numeric(anchor_date),
62+
sd = .data$sd
63+
),
64+
origin = "1970-01-01"
65+
)
66+
# Convert to character in ISO format based on the date format
67+
format(dates, "%Y-%m-%d")
68+
})
2369
}
2470

71+
tvt_email <- function(df) {
72+
df |>
73+
dplyr::filter(.data$tvt == "tvt_email") |>
74+
dplyr::mutate(
75+
value = as.character(replicate(
76+
length(.data$field_name),
77+
paste0(lorem::ipsum_words(1), "@example.org")
78+
))
79+
)
80+
}
81+
82+
rtnorm <- function(n, mean, sd, a = NA, b = NA) {
83+
a <- as.numeric(a)
84+
b <- as.numeric(b)
85+
a <- ifelse(is.na(a), -Inf, a)
86+
b <- ifelse(is.na(b), Inf, b)
87+
stats::qnorm(stats::runif(n, stats::pnorm(a, mean, sd), stats::pnorm(b, mean, sd)), mean, sd)
88+
}
89+
90+
tvt_integer <- function(df) {
91+
df |>
92+
dplyr::filter(.data$tvt == "tvt_integer") |>
93+
dplyr::mutate(
94+
value = as.character(round(rtnorm(
95+
n = length(.data$field_name), mean = .data$mean, sd = .data$sd,
96+
a = .data$text_validation_min, b = .data$text_validation_max
97+
)))
98+
)
99+
}
100+
101+
tvt_number <- function(df) {
102+
df |>
103+
dplyr::filter(.data$tvt == "tvt_number") |>
104+
dplyr::mutate(
105+
value = as.character(round(rtnorm(
106+
n = length(.data$field_name), mean = .data$mean, sd = .data$sd,
107+
a = .data$text_validation_min, b = .data$text_validation_max
108+
), digits = 2))
109+
)
110+
}
111+
112+
tvt_zipcode <- function(df) {
113+
df |>
114+
dplyr::filter(.data$tvt == "tvt_zipcode") |>
115+
dplyr::mutate(
116+
value = "32611"
117+
)
118+
}
119+
120+
tvt_phone <- function(df) {
121+
df |>
122+
dplyr::filter(.data$tvt == "tvt_phone") |>
123+
dplyr::mutate(
124+
value = "800-867-5309"
125+
)
126+
}
127+
128+
25129
tvt_types <- c(
26-
"tvt_na"
130+
"tvt_na", "tvt_datetime", "tvt_date",
131+
"tvt_email", "tvt_integer", "tvt_number",
132+
"tvt_zipcode", "tvt_phone"
27133
)
28134

29135
process_one_text_validation_type <- function(my_tvt, df) {
@@ -35,11 +141,21 @@ get_long_text_field_values <- function(long_text_fields) {
35141
}
36142

37143
text_field_values <-
38-
purrr::map(tvt_types,
39-
process_one_text_validation_type,
40-
long_text_fields |> dplyr::filter(.data$field_type == "text")
41-
) |>
42-
dplyr::bind_rows()
144+
purrr::map(
145+
tvt_types,
146+
process_one_text_validation_type,
147+
long_text_fields |> dplyr::filter(.data$field_type == "text")
148+
) |>
149+
# get rid of empty data frames in the list output
150+
purrr::keep(~ nrow(.x) > 0)
151+
152+
# If the list is empty after filtering, create a minimal empty data frame
153+
if (length(text_field_values) == 0) {
154+
text_field_values <- dplyr::tibble(field_name = character(0), value = character(0))
155+
} else {
156+
# Otherwise, combine all data frames in the list
157+
text_field_values <- dplyr::bind_rows(text_field_values)
158+
}
43159

44160
result <- text_field_values |>
45161
dplyr::select("field_name", "value")

R/get_long_text_fields.R

Lines changed: 114 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#' @title Get every text field response from a REDCap data dictionary
22
#'
33
#' @description
4-
#' Given a REDCap data dictionary, enumerate every text field in that data dictionary and return a dataset with default weights
4+
#' Given a REDCap data dictionary, enumerate every text field in that data dictionary
5+
#' and return a dataset with default weights
56
#'
67
#' @param metadata A REDCap data dictionary
78
#'
@@ -42,8 +43,19 @@ get_long_text_fields <- function(metadata) {
4243
"text_validation_max"
4344
)) |>
4445
dplyr::rename(text_validation_type = "text_validation_type_or_show_slider_number") |>
46+
dplyr::mutate(
47+
text_validation_min = as.character(.data$text_validation_min),
48+
text_validation_max = as.character(.data$text_validation_max)
49+
) |>
4550
dplyr::mutate(tvt = dplyr::case_when(
4651
is.na(.data$text_validation_type) ~ "tvt_na",
52+
grepl("^datetime.*", .data$text_validation_type) ~ "tvt_datetime",
53+
grepl("^date_", .data$text_validation_type) ~ "tvt_date",
54+
grepl("email", .data$text_validation_type) ~ "tvt_email",
55+
grepl("integer", .data$text_validation_type) ~ "tvt_integer",
56+
grepl("number", .data$text_validation_type) ~ "tvt_number",
57+
grepl("zipcode", .data$text_validation_type) ~ "tvt_zipcode",
58+
grepl("phone", .data$text_validation_type) ~ "tvt_phone",
4759
TRUE ~ "tvt_unsupported"
4860
)) |>
4961
# set weights for each response
@@ -57,6 +69,104 @@ get_long_text_fields <- function(metadata) {
5769
return(result)
5870
}
5971

72+
tvt_email <- function(text_fields) {
73+
result <-
74+
text_fields |>
75+
dplyr::filter(grepl("email", .data$text_validation_type)) |>
76+
dplyr::mutate(weight = 100)
77+
return(result)
78+
}
79+
80+
tvt_datetime <- function(text_fields) {
81+
result <-
82+
text_fields |>
83+
dplyr::filter(grepl("^datetime.*", .data$text_validation_type)) |>
84+
dplyr::mutate(
85+
text_validation_min = dplyr::case_when(
86+
grepl("^\\[.*\\]$", .data$text_validation_min) ~ NA_character_,
87+
grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_min) ~ .data$text_validation_min,
88+
grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_min) ~ as.character(.data$text_validation_min),
89+
TRUE ~ NA_character_
90+
),
91+
text_validation_max = dplyr::case_when(
92+
grepl("^\\[.*\\]$", .data$text_validation_max) ~ NA_character_,
93+
grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_max) ~ .data$text_validation_max,
94+
grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_max) ~ as.character(.data$text_validation_max),
95+
TRUE ~ NA_character_
96+
),
97+
origin_function = "lubridate::now()",
98+
sd = 3600 * 24
99+
)
100+
return(result)
101+
}
102+
103+
tvt_date <- function(text_fields) {
104+
result <-
105+
text_fields |>
106+
dplyr::filter(grepl("^date_", .data$text_validation_type)) |>
107+
dplyr::mutate(
108+
text_validation_min = dplyr::case_when(
109+
grepl("^\\[.*\\]$", .data$text_validation_min) ~ NA_character_,
110+
grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_min) ~ .data$text_validation_min,
111+
grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_min) ~ as.character(.data$text_validation_min),
112+
TRUE ~ NA_character_
113+
),
114+
text_validation_max = dplyr::case_when(
115+
grepl("^\\[.*\\]$", .data$text_validation_max) ~ NA_character_,
116+
grepl("^-?[0-9]+(\\.[0-9]+)?$", .data$text_validation_max) ~ .data$text_validation_max,
117+
grepl("^\\d{4}-\\d{2}-\\d{2}$", .data$text_validation_max) ~ as.character(.data$text_validation_max),
118+
TRUE ~ NA_character_
119+
),
120+
origin_function = "lubridate::today()",
121+
sd = 3600 * 24
122+
)
123+
return(result)
124+
}
125+
126+
tvt_integer <- function(text_fields) {
127+
result <-
128+
text_fields |>
129+
dplyr::filter(grepl("integer", .data$text_validation_type)) |>
130+
dplyr::mutate(
131+
min_val = dplyr::coalesce(as.numeric(.data$text_validation_min), as.numeric(.data$text_validation_max) - 10, 0),
132+
max_val = dplyr::coalesce(as.numeric(.data$text_validation_max), as.numeric(.data$text_validation_min) + 10, 10),
133+
mean = (.data$min_val + .data$max_val) / 2,
134+
sd = (.data$max_val - .data$min_val) / 6
135+
) |>
136+
dplyr::select(-"min_val", -"max_val")
137+
return(result)
138+
}
139+
140+
tvt_number <- function(text_fields) {
141+
result <-
142+
text_fields |>
143+
dplyr::filter(grepl("number", .data$text_validation_type)) |>
144+
dplyr::mutate(
145+
min_val = dplyr::coalesce(as.numeric(.data$text_validation_min), as.numeric(.data$text_validation_max) - 10, 0),
146+
max_val = dplyr::coalesce(as.numeric(.data$text_validation_max), as.numeric(.data$text_validation_min) + 10, 10),
147+
mean = (.data$min_val + .data$max_val) / 2,
148+
sd = (.data$max_val - .data$min_val) / 6
149+
) |>
150+
dplyr::select(-"min_val", -"max_val")
151+
return(result)
152+
}
153+
154+
tvt_zipcode <- function(text_fields) {
155+
result <-
156+
text_fields |>
157+
dplyr::filter(grepl("zipcode", .data$text_validation_type)) |>
158+
dplyr::mutate(weight = 100)
159+
return(result)
160+
}
161+
162+
tvt_phone <- function(text_fields) {
163+
result <-
164+
text_fields |>
165+
dplyr::filter(grepl("phone", .data$text_validation_type)) |>
166+
dplyr::mutate(weight = 100)
167+
return(result)
168+
}
169+
60170
tvt_unsupported <- function(text_fields) {
61171
result <-
62172
text_fields |>
@@ -65,8 +175,9 @@ get_long_text_fields <- function(metadata) {
65175
}
66176

67177
tvt_types <- c(
68-
"tvt_na",
69-
"tvt_unsupported"
178+
"tvt_na", "tvt_datetime", "tvt_unsupported", "tvt_date",
179+
"tvt_email", "tvt_integer", "tvt_number", "tvt_zipcode",
180+
"tvt_phone"
70181
)
71182

72183
process_one_text_validation_type <- function(my_tvt, df) {

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,28 @@ Copy [proof_of_concept.R](https://github.com/ctsit/redcapfiller/blob/main/proof_
1919

2020
### Limitations
2121

22-
REDCap Filler does not yet understand all the dimensions of a modern REDCap project. It can fill the categorical fields. It can fill unvalidated text fields. It ignores all other field types and will not attempt to fill them. It doesn't even know how to fill a date field. :-( Filler only knows how to fill classic projects without repeating forms or events. It does not honor form display logic and ignores all fields governed by branching logic.
22+
REDCap Filler does not yet understand all the dimensions of a modern REDCap project. It can fill the categorical fields. It can fill unvalidated text field and the text validation types date, datetime, email, integer, number, phone, and zipcode. It ignores all other field types and validation types and will not attempt to fill them. Filler only knows how to fill classic projects without repeating forms or events. It does not honor form display logic and ignores all fields governed by branching logic.
2323

24-
Focusing more on what Filler _can_ do, the first release milestone will support these features:
24+
Focusing more on what Filler _can_ or _will_ do, the first release milestone will support these features:
2525

26-
- Use the REDCap data dictionary as the primary input. Add a few parameters to define record count and forms to fill
26+
- Use the REDCap data dictionary as the primary input. Add a few parameters to define record count and forms to fill.
2727
- Work on classic projects with no repeating objects.
2828
- Fill the forms named in a vector of form names.
29-
- Fill out every field on the named forms as long as there is no BL constraint.
29+
- Fill out every field on the named forms as long as there is no BL constraint. (Note: Timeline constraints might delay some text validation types)
3030
- Do not violate any data constraints.
3131
- Inject a simple default randomness wherever practicable.
3232
- Set the form completed fields to green.
3333
- Use the REDCap API to read the data dictionary and write the data.
3434
- Provide a uniform distribution on categorical fields.
35+
- Provide normal distribution on numeric and date fields.
3536

3637
### Futures
3738

3839
This project aims to populate complex REDCap projects using the project design. If the REDCap API exposes a design dimension, we plan to use that to guide how the Filler populates projects. Yet, that will take some time to develop fully. This is the proposed timeline of features:
3940

4041
1. Populate all fields in a classic project with no repeating objects, ignoring fields with BL constraints and completely ignoring FDL.
4142
2. Add support for longitudinal projects
43+
3. Complete any text validation types missing in the first release.
4244
3. Add support for repeating events and repeating forms.
4345
4. Allow non-uniform distribution of categorical fields. Allow non-default distributions on ranged fields.
4446
5. Allow inter-record and intra-record date offsets.

man/get_long_text_fields.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Binary file not shown.
-913 Bytes
Binary file not shown.

tests/testthat/get_one_rectangle_of_values/make_test_data.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
metadata_file <- testthat::test_path("get_one_rectangle_of_values", "metadata.csv")
1+
metadata_file <- testthat::test_path("shared_testdata", "metadata.csv")
22
metadata <- readr::read_csv(metadata_file)
33

44
long_fields_and_responses <- dplyr::bind_rows(
55
get_long_categorical_field_responses(metadata),
6-
get_long_text_fields(metadata)
6+
get_long_text_fields(metadata),
7+
get_long_notes_fields(metadata),
8+
get_long_slider_fields(metadata)
79
)
810

911
long_fields_and_responses |>

tests/testthat/test-get_long_text_field_values.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,19 @@ testthat::test_that("get_long_text_field_values returns a varying vector of valu
3131
))
3232
})
3333

34+
testthat::test_that("get_long_text_field_values returns a value field for date and datetime fields", {
35+
date_fields <- output$field_name[grepl("^date.", output$field_name)]
36+
datetime_fields <- output$field_name[grepl("^datetime.", output$field_name)]
37+
38+
date_values <- output$value[output$field_name %in% date_fields]
39+
datetime_values <- output$value[output$field_name %in% datetime_fields]
40+
41+
testthat::expect_true(length(date_fields) > 0 | length(datetime_fields) > 0)
42+
testthat::expect_true(all(date_values != "") & all(datetime_values != ""))
43+
})
44+
45+
# test that every value is a character
46+
testthat::test_that("get_long_text_field_values returns a character vector", {
47+
testthat::expect_true(all(sapply(output$value, is.character)))
48+
})
49+

0 commit comments

Comments
 (0)