diff --git a/DESCRIPTION b/DESCRIPTION index 181cc94..f0d4add 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,17 @@ Type: Package Package: mwana -Title: Utilities for Analysing Children's Nutritional Status -Version: 0.1.0.9000 +Title: An Efficient Workflow for Plausibility Checks and Prevalence Analysis of Wasting in R +Version: 0.2.0.9000 Authors@R: c( person("Tomás", "Zaba", , "tomas.zaba@outlook.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-7079-3574")), person("Ernest", "Guevarra", role = c("aut", "cph"), comment = c(ORCID = "0000-0002-4887-4415")) ) -Description: A streamlined and comprehensive implementation of the Standardized +Description: A simple and streamlined workflow for plausibility checks and + prevalence analysis of wasting based on the Standardized Monitoring and Assessment of Relief and Transition (SMART) Methodology - guidelines for data quality checks and - prevalence estimation, with enhanced programmable process particularly when - handling multiple area datasets. + , with application in R. License: GPL (>= 3) URL: https://github.com/nutriverse/mwana, https://nutriverse.io/mwana Imports: diff --git a/NAMESPACE b/NAMESPACE index 21319a7..b4a0339 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,15 @@ # Generated by roxygen2: do not edit by hand -export(compute_combined_prevalence) -export(compute_muac_prevalence) -export(compute_wfhz_prevalence) export(define_wasting) export(flag_outliers) export(get_age_months) export(mw_check_ipcamn_ssreq) +export(mw_estimate_prevalence_combined) +export(mw_estimate_prevalence_mfaz) +export(mw_estimate_prevalence_muac) +export(mw_estimate_prevalence_screening) +export(mw_estimate_prevalence_wfhz) +export(mw_estimate_smart_age_wt) export(mw_neat_output_mfaz) export(mw_neat_output_muac) export(mw_neat_output_wfhz) @@ -20,15 +23,20 @@ export(mw_wrangle_wfhz) export(recode_muac) export(remove_flags) importFrom(dplyr,across) +importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) importFrom(dplyr,ends_with) importFrom(dplyr,everything) importFrom(dplyr,filter) importFrom(dplyr,group_by) +importFrom(dplyr,is_grouped_df) importFrom(dplyr,mutate) importFrom(dplyr,n) importFrom(dplyr,n_distinct) +importFrom(dplyr,pull) +importFrom(dplyr,relocate) importFrom(dplyr,rename) +importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(lubridate,ymd) importFrom(methods,is) @@ -40,6 +48,8 @@ importFrom(nipnTK,skewKurt) importFrom(rlang,.data) importFrom(rlang,enquo) importFrom(rlang,eval_tidy) +importFrom(rlang,quo_is_null) +importFrom(rlang,quo_name) importFrom(rlang,sym) importFrom(scales,label_percent) importFrom(scales,label_pvalue) @@ -51,4 +61,5 @@ importFrom(stats,prop.test) importFrom(stats,sd) importFrom(stats,setNames) importFrom(tibble,as_tibble) +importFrom(tibble,tibble) importFrom(zscorer,addWGSR) diff --git a/NEWS.md b/NEWS.md index 49cd478..2a4f404 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,28 @@ -# mwana v0.1.0.9000 (development version) +# mwana v0.2.0.9000 (development version)
+# mwana v0.2.0 + +## New features + +* Added new function `mw_estimate_prevalence_screening()` to estimate prevalence +of wasting by MUAC from non survey data: screenings, sentinel sites, etc. + +## Bug fixes + +* Resolved issues with `mw_neat_output_mfaz()`, `mw_neat_output_wfhz()` and +`mw_neat_output_muac()` not returning neat and tidy output for grouped `data.frame` +from their respective plausibility checkers. + +* Resolved issue with `edema` argument in prevalence functions that was not working +as expected when set to `NULL`. + +## General updates + +* Updated general package documentation, including references in vignettes. +* Built package using `R` version 4.4.2 + # mwana v0.1.0 * Initial pre-release version for alpha-testing. diff --git a/R/case_definitions.R b/R/case_definitions.R deleted file mode 100644 index 2696f6f..0000000 --- a/R/case_definitions.R +++ /dev/null @@ -1,255 +0,0 @@ -#' -#' Define wasting based on WFHZ, MFAZ, MUAC and Combined criteria -#' -#' @description -#' Define if a given observation in the dataset is wasted or not, on the basis of -#' WFHZ, MFAZ, MUAC and the combined criteria. -#' -#' @param df A dataset object of class `data.frame` to use. -#' -#' @param muac A vector of class `integer` of MUAC values in millimeters. -#' -#' @param zscore A vector of class `double` of WFHZ values (with 3 decimal places). -#' -#' @param edema A vector of class `character` of edema. Code should be -#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. -#' -#' @param cases A choice of the form of wasting to be defined. -#' -#' @param base A choice of the criterion on which the case-definition should be based. -#' -#' @returns A vector of class `numeric` of dummy values: 1 for case and 0 -#' for not case. -#' -#' @details -#' Use `define_wasting()` to add the case-definitions to data frame. -#' -#' @rdname case_definition -#' -#' -define_wasting_cases_muac <- function(muac, edema = NULL, - cases = c("gam", "sam", "mam")) { - ## Match argument ---- - cases <- match.arg(cases) - - if (!is.null(edema)) { - switch( - ### Define cases based on MUAC including edema ---- - cases, - "gam" = {gam <- ifelse(muac < 125 | edema == "y", 1, 0)}, - "sam" = {sam <- ifelse(muac < 115 | edema == "y", 1, 0)}, - "mam" = {mam <- ifelse((muac >= 115 & muac < 125 & edema == "n"), 1, 0)} - ) - } else { - switch( - ### Define cases based on MUAC ---- - cases, - "gam" = {gam <- ifelse(muac < 125, 1, 0)}, - "sam" = {sam <- ifelse(muac < 115, 1, 0)}, - "mam" = {mam <- ifelse((muac >= 115 & muac < 125), 1, 0)} - ) - } -} - -#' -#' -#' @rdname case_definition -#' -#' -define_wasting_cases_whz <- function(zscore, edema = NULL, - cases = c("gam", "sam", "mam")) { - ## Match argument ---- - cases <- match.arg(cases) - - if (!is.null(edema)) { - switch( - ### Define cases based on WFHZ including edema ---- - cases, - "gam" = {gam <- ifelse(zscore < -2 | edema == "y", 1, 0)}, - "sam" = {sam <- ifelse(zscore < - 3 | edema == "y", 1, 0)}, - "mam" = {mam <- ifelse((zscore >= -3 & zscore < -2 & edema == "n"), 1, 0)} - ) - } else { - switch( - ### Define cases based on WFHZ ---- - cases, - "gam" = {gam <- ifelse(zscore < -2, 1, 0)}, - "sam" = {sam <- ifelse(zscore < - 3, 1, 0)}, - "mam" = {mam <- ifelse(zscore >= -3 & zscore < -2, 1, 0)} - ) - } -} - -#' -#' -#' @rdname case_definition -#' -#' -define_wasting_cases_combined <- function(zscore, muac, edema = NULL, - cases = c("cgam", "csam", "cmam")) { - - ## Match argument ---- - cases <- match.arg(cases) - - if (!is.null(edema)) { - switch( - ### Define cases based on WFHZ or MUAC or edema ---- - cases, - "cgam" = {cgam <- ifelse(zscore < -2 | muac < 125 | edema == "y", 1, 0)}, - "csam" = {csam <- ifelse(zscore < -3 | muac < 115 | edema == "y", 1, 0)}, - "cmam" = {cmam <- ifelse((zscore >= -3 & zscore < -2) | (muac >= 115 & muac < 125) & (edema == "n"), 1, 0)} - ) - } else { - switch( - ### Define cases based on WFHZ or MUAC ---- - cases, - "cgam" = {cgam <- ifelse(zscore < -2 | muac < 125, 1, 0)}, - "csam" = {csam <- ifelse(zscore < -3 | muac < 115, 1, 0)}, - "cmam" = {cmam <- ifelse((zscore >= -3 & zscore < -2) | (muac >= 115 & muac < 125), 1, 0)} - ) - } -} - - -#' -#' @examples -#' -#' ## Weight-for-height based case-definition ---- -#' x <- anthro.02 |> -#' define_wasting( -#' zscore = wfhz, -#' edema = edema, -#' base = "wfhz" -#' ) -#' head(x) -#' -#' ## MUAC-based case-definition ---- -#' x <- anthro.02 |> -#' define_wasting( -#' muac = muac, -#' edema = edema, -#' base = "muac" -#' ) -#' head(x) -#' -#' ## Combined case-definition ---- -#' x <- anthro.02 |> -#' define_wasting( -#' zscore = wfhz, -#' muac = muac, -#' edema = edema, -#' base = "combined" -#' ) -#' head(x) -#' -#' @rdname case_definition -#' -#' @export -#' -define_wasting <- function(df, zscore = NULL, muac = NULL, edema = NULL, - base = c("wfhz", "muac", "combined")) { - - ## Match argument ---- - base <- match.arg(base) - - switch( - ### Add WFHZ based case definitions to data frame ---- - base, - "wfhz" = { - df |> - mutate( - gam = define_wasting_cases_whz( - zscore = {{ zscore }}, - edema = {{ edema }}, - cases = "gam" - ), - sam = define_wasting_cases_whz( - zscore = {{ zscore }}, - edema = {{ edema }}, - cases = "sam" - ), - mam = define_wasting_cases_whz( - zscore = {{ zscore }}, - edema = {{ edema }}, - cases = "mam") - ) - }, - ### Add MUAC based case definitions to data frame ---- - "muac" = { - df |> - mutate( - gam = define_wasting_cases_muac( - muac = {{ muac }}, - edema = {{ edema }}, - cases = "gam" - ), - sam = define_wasting_cases_muac( - muac = {{ muac }}, - edema = {{ edema }}, - cases = "sam" - ), - mam = define_wasting_cases_muac( - muac = {{ muac }}, - edema = {{ edema }}, - cases = "mam" - ) - ) - }, - ### Add combined (WFHZ or MUAC or edema) based case definitions to data frame ---- - "combined" = { - df |> - mutate( - cgam = define_wasting_cases_combined( - zscore = {{ zscore }}, - muac = {{ muac }}, - edema = {{ edema }}, - cases = "cgam" - ), - csam = define_wasting_cases_combined( - zscore = {{ zscore }}, - muac = {{ muac }}, - edema = {{ edema }}, - cases = "csam" - ), - cmam = define_wasting_cases_combined( - zscore = {{ zscore }}, - muac = {{ muac }}, - edema = {{ edema }}, - cases = "cmam" - ) - ) - } - ) -} - -#' -#' Classify wasting into severe or moderate wasting to be used in the -#' SMART MUAC tool weighting approach -#' -#' @param muac A vector of class `integer` of MUAC values in millimeters. -#' -#' @param .edema A vector of class `character` of edema. Code should be -#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. -#' -#' @returns A vector of class `character` of the same length as `muac` and `.edema` -#' indicating if a child is severe or moderately wasted or not wasted. -#' -#' -classify_wasting_for_cdc_approach <- function(muac, .edema = NULL) { - if (!is.null(.edema)) { - ## Define cases including edema ---- - x <- case_when( - muac < 115 | {{ .edema }} == "y" ~ "sam", - muac >= 115 & muac < 125 & {{ .edema }} == "n" ~ "mam", - .default = "not wasted" - ) - } else { - ## Define cases excluding edema ---- - x <- case_when( - muac < 115 ~ "sam", - muac >= 115 & muac < 125 ~ "mam", - .default = "not wasted" - ) - } - x -} diff --git a/R/data.R b/R/data.R index 00b8661..f4e76b3 100644 --- a/R/data.R +++ b/R/data.R @@ -74,13 +74,13 @@ #' A sample data of district level SMART surveys conducted in Mozambique #' #' @description -#' `anthro.03` contains survey data of four districts. Each district dataset +#' `anthro.03` contains survey data of four districts. Each district data set #' presents distinct data quality scenarios that requires tailored prevalence #' analysis approach: two districts show a problematic WFHZ standard deviation #' whilst the remaining are all within range. #' #' This sample data is useful to demonstrate the use of the prevalence functions on -#' a multi-area survey data where there can be variations in the rating of +#' a multiple-area survey data where there can be variations in the rating of #' acceptability of the standard deviation, hence require different analyses approaches #' for each area to ensure accurate estimation. #' @@ -113,7 +113,7 @@ #' #' @description #' Data was generated through a community-based sentinel site conducted -#' across three provinces. Each province's dataset presents distinct +#' across three provinces. Each province's data set presents distinct #' data quality scenarios, requiring tailored prevalence analysis: #' + "Province 1" has MFAZ's standard deviation and age ratio test rating of #' acceptability falling within range; @@ -122,7 +122,7 @@ #' + "Province 3" has both tests rated as problematic. #' #' This sample data is useful to demonstrate the use of prevalence functions on -#' a multi-area survey data where variations in the rating of acceptability of the +#' a multiple-area survey data where variations in the rating of acceptability of the #' standard deviation exist, hence require different analyses approaches for each #' area to ensure accurate estimation. #' diff --git a/R/ipc_amn_check.R b/R/ipc_amn_check.R index 9b1a7f8..be2c5f3 100644 --- a/R/ipc_amn_check.R +++ b/R/ipc_amn_check.R @@ -5,10 +5,10 @@ #' Evidence on the prevalence of acute malnutrition used in the IPC AMN #' can come from different sources: surveys, screenings or community-based #' surveillance system. The IPC set minimum sample size requirements -#' for each source. This function helps in verifying whether the requirements +#' for each source. This function helps in verifying whether those requirements #' were met or not depending on the source. #' -#' @param df A dataset object of class `data.frame` to check. +#' @param df A data set object of class `data.frame` to check. #' #' @param cluster A vector of class `integer` or `character` of unique cluster or #' screening or sentinel site IDs. If a `character` vector, ensure that names are @@ -23,7 +23,7 @@ #' @returns A summary table of class `data.frame`, of length 3 and width 1, for #' the check results. `n_clusters` is for the total number of unique clusters or #' screening or site IDs; `n_obs` for the correspondent total number of children -#' in the dataset; and `meet_ipc` for whether the IPC AMN requirements were met. +#' in the data set; and `meet_ipc` for whether the IPC AMN requirements were met. #' #' @references #' IPC Global Partners. 2021. *Integrated Food Security Phase Classification* diff --git a/R/mwana-package.R b/R/mwana-package.R index d1e15b2..94d171a 100644 --- a/R/mwana-package.R +++ b/R/mwana-package.R @@ -3,17 +3,25 @@ ## usethis namespace: start #' @importFrom dplyr across case_when group_by mutate n n_distinct rename summarise +#' @importFrom dplyr bind_rows #' @importFrom dplyr ends_with everything filter mutate #' @importFrom dplyr group_by +#' @importFrom dplyr is_grouped_df +#' @importFrom dplyr pull +#' @importFrom dplyr relocate +#' @importFrom dplyr select #' @importFrom dplyr summarise #' @importFrom lubridate ymd #' @importFrom methods is #' @importFrom nipnTK ageRatioTest digitPreference sexRatioTest skewKurt greensIndex #' @importFrom rlang .data sym enquo eval_tidy +#' @importFrom rlang quo_is_null +#' @importFrom rlang quo_name #' @importFrom scales label_percent label_pvalue #' @importFrom srvyr as_survey_design survey_mean #' @importFrom stats na.omit prop.test sd pnorm setNames #' @importFrom tibble as_tibble +#' @importFrom tibble tibble #' @importFrom zscorer addWGSR ## usethis namespace: end NULL diff --git a/R/plausibility_check_mfaz.R b/R/plausibility_check_mfaz.R index f96b50d..0c57a37 100644 --- a/R/plausibility_check_mfaz.R +++ b/R/plausibility_check_mfaz.R @@ -5,15 +5,15 @@ #' @description #' Check the overall plausibility and acceptability of MFAZ data through a #' structured test suite encompassing sampling and measurement-related biases checks -#' in the dataset. The test suite in this function follows the recommendation made +#' in the data set. The test suite in this function follows the recommendation made #' by Bilukha, O., & Kianian, B. (2023) on the plausibility of -#' constructing a comprehensive plausibility check similar to WFHZ to evaluate the -#' acceptability of MUAC data when the variable age exists in the dataset. +#' constructing a comprehensive plausibility check for MUAC data similar to WFHZ +#' to evaluate its acceptability when the variable age exists in the data set. #' #' The function works on a data frame returned from this package's wrangling #' function for age and for MFAZ data. #' -#' @param df A dataset object of class `data.frame` to check. +#' @param df A data set object of class `data.frame` to check. #' #' @param sex A vector of class `numeric` of child's sex. #' @@ -24,9 +24,19 @@ #' @param flags A vector of class `numeric` of flagged records. #' #' @returns -#' A summarised table of class `data.frame`, of length 17 and width 1, for +#' A summarized table of class `data.frame`, of length 17 and width 1, for #' the plausibility test results and their respective acceptability ratings. #' +#' @details +#' Whilst the function uses the same test checks and criteria as that of WFHZ +#' in the SMART plausibility check, the percent of flagged data is evaluated +#' using a different cut-off points, with a maximum acceptability of 2.0%, +#' as shown below: +#' +#' |**Excellent** | **Good** | **Acceptable** | **Problematic** | +#' | :---: | :---: | :---: | :---: | +#' | 0.0 - 1.0 | >1.0 - 1.5| >1.5 - 2.0 | >2.0 | +#' #' @references #' Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement #' quality of mid‐upper arm circumference data in anthropometric surveys and @@ -117,12 +127,13 @@ mw_plausibility_check_mfaz <- function(df, sex, muac, age, flags) { #' for improved clarity and readability. It converts scientific notations to standard #' notations, round values and rename columns to meaningful names. #' -#' @param df A data frame containing the summary table returned by this package's -#' MFAZ plausibility check function. Must be of class `data.frame`. +#' @param df An object of class `data.frame` returned by this package's +#' plausibility checker for MFAZ data, containing the summarized results to be +#' formatted. #' #' @returns -#' A data frame of the same length and width as `df`, with column names and -#' values formatted for clarity. +#' A `data.frame` object of the same length and width as `df`, with column names and +#' values formatted for clarity and readability. #' #' @examples #' ## First wrangle age data ---- @@ -160,6 +171,9 @@ mw_plausibility_check_mfaz <- function(df, sex, muac, age, flags) { #' @export #' mw_neat_output_mfaz <- function(df) { + ## Check if `df` is grouped ---- + is_grouped <- is_grouped_df(df) + ## Format data frame ---- df <- df |> mutate( @@ -177,6 +191,7 @@ mw_neat_output_mfaz <- function(df) { ## Rename columns ---- setNames( c( + if (is_grouped) "Group" else NULL, "Total children", "Flagged data (%)", "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", "Age ratio (p)", "Class. of age ratio", "DPS (#)", diff --git a/R/plausibility_check_muac.R b/R/plausibility_check_muac.R index 36c2702..5c96529 100644 --- a/R/plausibility_check_muac.R +++ b/R/plausibility_check_muac.R @@ -4,10 +4,10 @@ #' @description #' Check the overall plausibility and acceptability of raw MUAC data through a #' structured test suite encompassing sampling and measurement-related biases checks -#' in the dataset. The test suite in this function follows the recommendation made +#' in the data set. The test suite in this function follows the recommendation made #' by Bilukha, O., & Kianian, B. (2023). #' -#' @param df A dataset object of class `data.frame` to check. It must have been +#' @param df An object of class `data.frame` to check. It must have been #' wrangled using this package's wrangling function for MUAC. #' #' @param sex A vector of class `numeric` of child's sex. @@ -16,8 +16,15 @@ #' #' @param flags A vector of class `numeric` of flagged records. #' -#' @returns A summarised table of class `data.frame`, of length 9 and width 1, for -#' the plausibility test results and their respective acceptability ratings.. +#' @returns A summarized table of class `data.frame`, of length 9 and width 1, for +#' the plausibility test results and their respective acceptability ratings. +#' +#' @details +#' Cut-off points used for the percent of flagged records: +#' |**Excellent** | **Good** | **Acceptable** | **Problematic** | +#' | :---: | :---: | :---: | :---: | +#' | 0.0 - 1.0 | >1.0 - 1.5| >1.5 - 2.0 | >2.0 | +#' #' #' @references #' Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement @@ -85,12 +92,13 @@ mw_plausibility_check_muac <- function(df, sex, muac, flags) { #' MUAC data for improved clarity and readability. It converts scientific notations #' to standard notations, round values and rename columns to meaningful names. #' -#' @param df A data frame containing the summary table returned by this package's -#' plausibility check function for raw MUAC data. Must be of class `data.frame`. +#' @param df An object of class `data.frame` returned by this package's +#' plausibility checker for raw MUAC data, containing the summarized results to be +#' formatted. #' #' @returns -#' A data frame of the same length and width as `df`, with column names and -#' values formatted for clarity. +#' A `data.frame` object of the same length and width as `df`, with column names and +#' values formatted for clarity and readability. #' #' @examples #' ## First wranlge MUAC data ---- @@ -120,6 +128,9 @@ mw_plausibility_check_muac <- function(df, sex, muac, flags) { #' mw_neat_output_muac <- function(df) { + ## Check if `df` is grouped ---- + is_grouped <- is_grouped_df(df) + ## Format data frame ---- df <- df |> mutate( @@ -131,7 +142,8 @@ mw_neat_output_muac <- function(df) { ) |> ## Rename columns ---- setNames( - c("Total children", "Flagged data (%)", "Class. of flagged data", "Sex ratio (p)", + c( if (is_grouped) "Group" else NULL, + "Total children", "Flagged data (%)", "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", "DPS(#)", "Class. of DPS", "Standard Dev* (#)", "Class. of standard dev") ) diff --git a/R/plausibility_check_wfhz.R b/R/plausibility_check_wfhz.R index 18bc38c..20ea47f 100644 --- a/R/plausibility_check_wfhz.R +++ b/R/plausibility_check_wfhz.R @@ -4,7 +4,7 @@ #' @description #' Check the overall plausibility and acceptability of WFHZ data through a #' structured test suite encompassing sampling and measurement-related biases checks -#' in the dataset. The test suite, including the criteria and corresponding rating of +#' in the data set. The test suite, including the criteria and corresponding rating of #' acceptability, follows the standards in the SMART plausibility check. The only #' exception is the exclusion of MUAC checks. MUAC is checked separately using more #' comprehensive test suite as well. @@ -12,7 +12,7 @@ #' The function works on a data frame returned from this package's wrangling #' function for age and for WFHZ data. #' -#' @param df A dataset object of class `data.frame` to check. +#' @param df A data set object of class `data.frame` to check. #' #' @param sex A vector of class `numeric` of child's sex. #' @@ -25,7 +25,7 @@ #' @param flags A vector of class `numeric` of flagged records. #' #' @returns -#' A summarised table of class `data.frame`, of length 19 and width 1, for +#' A summarized table of class `data.frame`, of length 19 and width 1, for #' the plausibility test results and their respective acceptability rates. #' #' @seealso [mw_plausibility_check_mfaz()] [mw_plausibility_check_muac()] @@ -122,12 +122,13 @@ mw_plausibility_check_wfhz <- function(df, #' for improved clarity and readability. It converts scientific notations to standard #' notations, round values and rename columns to meaningful names. #' -#' @param df A data frame containing the summary table returned by this package's -#' WFHZ plausibility check function. Must be of class `data.frame`. +#' @param df An object of class `data.frame` returned by this package's +#' plausibility checker for WFHZ data, containing the summarized results to be +#' formatted. #' #' @returns -#' A data frame of the same length and width as `df`, with column names and -#' values formatted for clarity. +#' A `data.frame` object of the same length and width as `df`, with column names and +#' values formatted for clarity and readability. #' #' @examples #' ## First wrangle age data ---- @@ -165,6 +166,9 @@ mw_plausibility_check_wfhz <- function(df, #' @export mw_neat_output_wfhz <- function(df) { + ## Check if `df` is grouped ---- + is_grouped <- is_grouped_df(df) + ## Format data frame ---- df <- df |> mutate( @@ -182,7 +186,8 @@ df <- df |> ) |> ## Rename columns ---- setNames( - c("Total children", "Flagged data (%)", "Class. of flagged data", + c( if (is_grouped) "Group" else NULL, + "Total children", "Flagged data (%)", "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", "Age ratio (p)", "Class. of age ratio", "DPS weight (#)", "Class. DPS weight", "DPS height (#)", "Class. DPS height", "Standard Dev* (#)", diff --git a/R/prev_define_wasting.R b/R/prev_define_wasting.R new file mode 100644 index 0000000..cdcb5ad --- /dev/null +++ b/R/prev_define_wasting.R @@ -0,0 +1,333 @@ +#' +#' +#' @keywords internal +#' +#' +define_wasting_muac <- function(muac, + edema = NULL, + .cases = c("gam", "sam", "mam")) { + ## Enforce options in `.cases` ---- + .cases <- match.arg(.cases) + + if (!is.null(edema)) { + switch( + ### Wasting by MUAC including MUAC ---- + .cases, + "gam" = { + gam <- ifelse(muac < 125 | edema == "y", 1, 0) + }, + "sam" = { + sam <- ifelse(muac < 115 | edema == "y", 1, 0) + }, + "mam" = { + mam <- ifelse((muac >= 115 & muac < 125 & edema == "n"), 1, 0) + } + ) + } else { + switch( + ### Wasting by MUAC without edema ---- + .cases, + "gam" = { + gam <- ifelse(muac < 125, 1, 0) + }, + "sam" = { + sam <- ifelse(muac < 115, 1, 0) + }, + "mam" = { + mam <- ifelse((muac >= 115 & muac < 125), 1, 0) + } + ) + } +} + +#' +#' +#' @keywords internal +#' +#' +define_wasting_zscores <- function(zscores, + edema = NULL, + .cases = c("gam", "sam", "mam")) { + ## Enforce options in `.cases` ---- + .cases <- match.arg(.cases) + + if (!is.null(edema)) { + switch( + ### Wasting by WFHZ including edema ---- + .cases, + "gam" = { + gam <- ifelse(zscores < -2 | edema == "y", 1, 0) + }, + "sam" = { + sam <- ifelse(zscores < -3 | edema == "y", 1, 0) + }, + "mam" = { + mam <- ifelse((zscores >= -3 & zscores < -2 & edema == "n"), 1, 0) + } + ) + } else { + switch( + ### Wasting by MFHZ sem edema ---- + .cases, + "gam" = { + gam <- ifelse(zscores < -2, 1, 0) + }, + "sam" = { + sam <- ifelse(zscores < -3, 1, 0) + }, + "mam" = { + mam <- ifelse(zscores >= -3 & zscores < -2, 1, 0) + } + ) + } +} + +#' +#' +#' @keywords internal +#' +#' +define_wasting_combined <- function(zscores, + muac, + edema = NULL, + .cases = c("cgam", "csam", "cmam")) { + ## Enforce options in `.cases` ---- + .cases <- match.arg(.cases) + + if (!is.null(edema)) { + switch( + ### Combined wasting including edema ---- + .cases, + "cgam" = { + cgam <- ifelse(zscores < -2 | muac < 125 | edema == "y", 1, 0) + }, + "csam" = { + csam <- ifelse(zscores < -3 | muac < 115 | edema == "y", 1, 0) + }, + "cmam" = { + cmam <- ifelse((zscores >= -3 & zscores < -2) | (muac >= 115 & muac < 125) & (edema == "n"), 1, 0) + } + ) + } else { + switch( + ### Combined wasting without edema ---- + .cases, + "cgam" = { + cgam <- ifelse(zscores < -2 | muac < 125, 1, 0) + }, + "csam" = { + csam <- ifelse(zscores < -3 | muac < 115, 1, 0) + }, + "cmam" = { + cmam <- ifelse((zscores >= -3 & zscores < -2) | (muac >= 115 & muac < 125), 1, 0) + } + ) + } +} + + +#' +#' Define wasting +#' +#' @description +#' Define if a given observation in the data set is wasted or not, and its +#' respective form of wasting (global, severe or moderate) on the basis of +#' z-scores of weight-for-height (WFHZ), muac-for-age (MFAZ), raw MUAC values and +#' combined case-definition. +#' +#' @param df A data set object of class `data.frame` to use. It must have been +#' wrangled using this package's wrangling functions for WFHZ or MUAC, or both +#' (for combined) as appropriate. +#' +#' @param zscores A vector of class `double` of WFHZ or MFAZ values. If the class +#' does not match the expected type, the function will stop execution and return +#' an error message indicating the type of mismatch. +#' +#' @param muac A vector of class `integer` or `numeric` of raw MUAC values in +#' millimeters. If the class does not match the expected type, the function will +#' stop execution and return an error message indicating the type of mismatch. +#' +#' @param edema A vector of class `character` of edema. Default is `NULL`. +#' If the class does not match the expected type, the function will stop execution +#' and return an error message indicating the type of mismatch. Code values should be +#' "y" for presence and "n" for absence of bilateral edema. If different, the +#' function will stop execution and return an error indicating the issue. +#' +#' @param .by A choice of the criterion by which the case-definition should done. +#' Choose `zscores` for WFHZ or MFAZ, `muac` for raw MUAC and `combined` for +#' combined. +#' +#' @returns Three vectors named `gam`, `sam` and `mam`, of class `numeric`, same +#' length as inputs, containing dummy values: 1 for case and 0 for not case. +#' This is added to `df`. When `combined` is selected, vector's names become +#' `cgam`, `csam` and `cmam`. +#' +#' @examples +#' ## Case-definition by z-scores ---- +#' z <- anthro.02 |> +#' define_wasting( +#' zscores = wfhz, +#' muac = NULL, +#' edema = edema, +#' .by = "zscores" +#' ) +#' head(z) +#' +#' ## Case-definition by MUAC ---- +#' m <- anthro.02 |> +#' define_wasting( +#' zscores = NULL, +#' muac = muac, +#' edema = edema, +#' .by = "muac" +#' ) +#' head(m) +#' +#' ## Case-definition by combined ---- +#' c <- anthro.02 |> +#' define_wasting( +#' zscores = wfhz, +#' muac = muac, +#' edema = edema, +#' .by = "combined" +#' ) +#' head(c) +#' +#' @export +#' +define_wasting <- function(df, + zscores = NULL, + muac = NULL, + edema = NULL, + .by = c("zscores", "muac", "combined")) { + + ## Difuse and evaluate arguments ---- + zscores <- eval_tidy(enquo(zscores), df) + muac <- eval_tidy(enquo(muac), df) + edema <- eval_tidy(enquo(edema), df) + + ## Enforce options in `.by` ---- + .by <- match.arg(.by) + + ## Enforce class of `zscores` ---- + if(!is.null(zscores)) { + if (!is.double(zscores)) { + stop("`zscores` must be of class 'double'; not ", shQuote(class(zscores)), ". Please try again.") + } + } + + ## Enforce class of `muac` ---- + if(!is.null(muac)) { + if (!(is.numeric(muac) | is.integer(muac))) { + stop("`muac` must be of class 'numeric' or 'integer'; not ", shQuote(class(muac)), ". Please try again.") + } + } + + ## Enforce class of `edema` ---- + if(!is.null(edema)) { + if (!is.character(edema)) { + stop("`edema` must be of class 'character'; not ", shQuote(class(edema)), ". Please try again.") + } + ## Enforce code values in `edema` ---- + if (!(all(levels(as.factor(as.character(edema))) %in% c("y", "n")))) { + stop("Values in `edema` should either be 'y' or 'n'. Please try again.") + } + } + + ## Define cases ---- + switch( + ### By WFHZ or MFAZ and add to the data frame ---- + .by, + "zscores" = { + df |> + mutate( + gam = define_wasting_zscores( + zscores = {{ zscores }}, + edema = {{ edema }}, + .cases = "gam" + ), + sam = define_wasting_zscores( + zscores = {{ zscores }}, + edema = {{ edema }}, + .cases = "sam" + ), + mam = define_wasting_zscores( + zscores = {{ zscores }}, + edema = {{ edema }}, + .cases = "mam" + ) + ) + }, + ### By MUAC and add to the data frame ---- + "muac" = { + df |> + mutate( + gam = define_wasting_muac( + muac = {{ muac }}, + edema = {{ edema }}, + .cases = "gam" + ), + sam = define_wasting_muac( + muac = {{ muac }}, + edema = {{ edema }}, + .cases = "sam" + ), + mam = define_wasting_muac( + muac = {{ muac }}, + edema = {{ edema }}, + .cases = "mam" + ) + ) + }, + ### By combined add to the data frame ---- + "combined" = { + df |> + mutate( + cgam = define_wasting_combined( + zscores = {{ zscores }}, + muac = {{ muac }}, + edema = {{ edema }}, + .cases = "cgam" + ), + csam = define_wasting_combined( + zscores = {{ zscores }}, + muac = {{ muac }}, + edema = {{ edema }}, + .cases = "csam" + ), + cmam = define_wasting_combined( + zscores = {{ zscores }}, + muac = {{ muac }}, + edema = {{ edema }}, + .cases = "cmam" + ) + ) + } + ) +} + + + +#' +#' +#' @keywords internal +#' +#' +smart_tool_case_definition <- function(muac, edema = NULL) { + if (!is.null(edema)) { + ## Define cases including edema ---- + x <- case_when( + muac < 115 | {{ edema }} == "y" ~ "sam", + muac >= 115 & muac < 125 & {{ edema }} == "n" ~ "mam", + .default = "not wasted" + ) + } else { + ## Define cases excluding edema ---- + x <- case_when( + muac < 115 ~ "sam", + muac >= 115 & muac < 125 ~ "mam", + .default = "not wasted" + ) + } + x +} diff --git a/R/prev_wasting_combined.R b/R/prev_wasting_combined.R new file mode 100644 index 0000000..0df7550 --- /dev/null +++ b/R/prev_wasting_combined.R @@ -0,0 +1,251 @@ +#' +#' +#' @keywords internal +#' +#' +complex_survey_estimates_combined <- function(df, + wt = NULL, + edema = NULL, + .by) { + ## Difuse arguments ---- + wt <- enquo(wt) + edema <- enquo(edema) + + ## Defines case based on the availability of edema ---- + if (!quo_is_null(edema)) { + ### Case definition when `edema` is not null ---- + df <- with( + df, + define_wasting(df, + zscore = .data$wfhz, + muac = .data$muac, + edema = !!edema, + .by = "combined" + ) |> + mutate( + cflags = ifelse(.data$flag_wfhz == 1 | .data$flag_mfaz == 1, 1, 0) + ) + ) + } else { + ### Case definition when `edema` is null ---- + df <- with( + df, + define_wasting(df, + zscore = .data$wfhz, + muac = .data$muac, + .by = "combined" + ) |> + mutate( + cflags = ifelse(.data$flag_wfhz == 1 | .data$flag_mfaz == 1, 1, 0) + ) + ) + } + + ## Create survey object ---- + if (!quo_is_null(wt)) { + srvy <- df |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG", + weights = !!wt + ) + } else { + srvy <- df |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG" + ) + } + ## Summarise prevalence ---- + p <- srvy |> + group_by({{ .by }}) |> + filter(.data$cflags == 0) |> + summarise( + across( + c(.data$cgam:.data$cmam), + list( + n = \(.)sum(., na.rm = TRUE), + p = \(.)survey_mean(., + vartype = "ci", + level = 0.95, + deff = TRUE, + na.rm = TRUE + ) + ) + ), + wt_pop = sum(srvyr::cur_svy_wts()) + ) + p +} + + + +#' +#' +#' Estimate the prevalence of combined wasting +#' +#' @description +#' Estimate the prevalence of wasting based on the combined case-definition of +#' weight-for-height z-scores (WFHZ), MUAC and/or edema. The function allows users to +#' get the prevalence estimates in accordance with the complex sample +#' design properties; this includes applying survey weights when needed or applicable. +#' Before estimating, the function evaluates the quality of data by calculating +#' and rating the standard deviation of WFHZ and MFAZ, as well as the p-value of +#' the age ratio test. +#' Prevalence will be calculated only when the rating of all test is as not +#' problematic concurrently. If either of them is problematic, it cancels out +#' the analysis and `NA`s get thrown. +#' +#' Outliers are detected in both WFHZ and in MUAC data set (through z-scores) +#' based on SMART flags get excluded prior being piped into the actual prevalence +#' analysis workflow. +#' +#' @param df A data set object of class `data.frame` to use. This must have been +#' wrangled using this package's wrangling functions for both WFHZ and MUAC data +#' sequentially. The order does not matter. Note that MUAC values should be +#' converted to millimeters after using the MUAC wrangler. If this is not done, +#' the function will stop execution and return an error message. Moreover, the +#' function uses a variable called `cluster` where the primary sampling unit IDs +#' are stored. Make sure to rename your cluster ID variable to `cluster`, otherwise +#' the function will error and terminate the execution. +#' +#' @param wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self-weighted survey, as in the ENA for SMART software; +#' otherwise a weighted analysis is computed. +#' +#' @param edema A vector of class `character` of edema. Code will be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' +#' @param .by A vector of class `character` or `numeric` of the geographical areas +#' or respective IDs for where the data was collected and for which the analysis +#' should be summarised at. +#' +#' @returns A summarised table of class `data.frame` for the descriptive +#' statistics about combined wasting. +#' +#' @details +#' A concept of "combined flags" is introduced in this function. It consists of +#' defining as flag any observation that is flagged in either `flag_wfhz` or +#' `flag_mfaz` vectors. A new column `cflags` for combined flags is created and +#' added to `df`. This ensures that all flagged observations from both WFHZ +#' and MFAZ data are excluded from the prevalence analysis. +#' +#' *A glimpse on how `cflags` are defined:* +#' | **flag_wfhz** | **flag_mfaz** | **cflags** | +#' | :---: | :---: | :---: | +#' | 1 | 0 | 1 | +#' | 0 | 1 | 1 | +#' | 0 | 0 | 0 | +#' +#' @examples +#' ## When .by and wt are set to NULL ---- +#' mw_estimate_prevalence_combined( +#' df = anthro.02, +#' wt = NULL, +#' edema = edema, +#' .by = NULL +#' ) +#' +#' ## When wt is not set to NULL ---- +#' mw_estimate_prevalence_combined( +#' df = anthro.02, +#' wt = wtfactor, +#' edema = edema, +#' .by = NULL +#' ) +#' +#' @export +#' +#' +mw_estimate_prevalence_combined <- function(df, + wt = NULL, + edema = NULL, + .by = NULL) { + ## Difuse argument `.by` ---- + .by <- enquo(.by) + + ## Enforce measuring unit is in "mm" ---- + x <- as.character(pull(df, .data$muac)) + if (any(grepl("\\.", x))) { + stop("MUAC values must be in millimeters. Please try again.") + } + + ## Empty vector to store results ---- + results <- list() + + if (!quo_is_null(.by)) { + ## Rate standard deviation and set MUAC analysis path ---- + x <- df |> + summarise( + std_wfhz = rate_std(sd(remove_flags(as.numeric(.data$wfhz), "zscores"), na.rm = TRUE)), + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std_mfaz = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), + muac_analysis_path = set_analysis_path(.data$age_ratio, .data$std_mfaz), + .by = !!.by + ) + } else { + ## Rate standard deviation and set MUAC analysis path ---- + x <- df |> + summarise( + std_wfhz = rate_std(sd(remove_flags(as.numeric(.data$wfhz), "zscores"), na.rm = TRUE)), + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std_mfaz = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), + muac_analysis_path = set_analysis_path(.data$age_ratio, .data$std_mfaz) + ) + } + + ## Iterate over data frame to compute prevalence according to the SD ---- + for (i in seq_len(nrow(x))) { + if (!quo_is_null(.by)) { + area <- pull(x, !!.by)[i] + data <- filter(df, !!sym(quo_name(.by)) == !!area) + } else { + data <- df + } + + std_wfhz <- x$std_wfhz[i] + muac_analysis_path <- x$muac_analysis_path[i] + + if (std_wfhz != "Problematic" && muac_analysis_path == "unweighted") { + ### Compute standard complex sample based prevalence analysis ---- + output <- data |> + complex_survey_estimates_combined( + wt = {{ wt }}, + edema = {{ edema }}, + .by = !!.by + ) + } else { + ## Add NA ---- + if (!quo_is_null(.by)) { + output <- data |> + summarise( + cgam_p = NA_real_, + csam_p = NA_real_, + cmam_p = NA_real_, + .by = !!.by + ) + } else { + ## Add NA ---- + output <- tibble( + cgam_p = NA_real_, + csam_p = NA_real_, + cmam_p = NA_real_ + ) + } + } + results[[i]] <- output + } + ### Ensure that all categories in `.by` get added to the tibble ---- + if (!quo_is_null(.by)) { + results <- bind_rows(results) |> + relocate(.data$cgam_p, .after = .data$cgam_n) |> + relocate(.data$csam_p, .after = .data$csam_n) |> + relocate(.data$cmam_p, .after = .data$cmam_n) + } else { + ## Ungrouped results + results <- bind_rows(results) + } + results +} diff --git a/R/prev_wasting_mfaz.R b/R/prev_wasting_mfaz.R new file mode 100644 index 0000000..fbe05a1 --- /dev/null +++ b/R/prev_wasting_mfaz.R @@ -0,0 +1,187 @@ +#' +#' +#' @keywords internal +#' +#' +complex_survey_estimates_mfaz <- function(df, + wt = NULL, + edema = NULL, + .by) { + ## Difuse arguments ---- + wt <- enquo(wt) + edema <- enquo(edema) + + ## Defines case based on the availability of edema ---- + if (!quo_is_null(edema)) { + ## When edema is available ---- + df <- with( + df, + define_wasting( + df, + zscores = .data$mfaz, + edema = !!edema, + .by = "zscores" + ) + ) + } else { + ## When edema is not available ---- + df <- with( + df, + define_wasting( + df, + zscores = .data$mfaz, + .by = "zscores" + ) + ) + } + + ## Create a survey object ---- + if (!is.null(wt)) { + srvy <- df |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG", + weights = {{ wt }} + ) + } else { + srvy <- df |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG" + ) + } + + ## Summarise prevalence ---- + p <- srvy |> + group_by({{ .by }}) |> + filter(.data$flag_mfaz == 0) |> + summarise( + across( + c(.data$gam:.data$mam), + list( + n = \(.)sum(., na.rm = TRUE), + p = \(.)survey_mean(., + vartype = "ci", + level = 0.95, + deff = TRUE, + na.rm = TRUE + ) + ) + ), + wt_pop = round(sum(srvyr::cur_svy_wts())) + ) + p +} + + + +#' +#' Estimate the prevalence of wasting based on z-scores of muac-for-age (MFAZ) +#' +#' @description +#' Calculate the prevalence estimates of wasting based on z-scores of +#' muac-for-age and/or bilateral edema. The function allows users to +#' get the prevalence estimates calculated in accordance with the complex sample +#' design properties; this includes applying survey weights when needed or applicable. +#' +#' Before estimating, the function evaluates the quality of data by calculating +#' and rating the standard deviation of z-scores of MFAZ. If rated as problematic, +#' the prevalence is estimated based on the PROBIT method. +#' +#' Outliers are detected based on SMART flags and get excluded prior prevalence analysis. +#' +#' @param df A data set object of class `data.frame` to use. This must have been +#' wrangled using this package's wrangling function for MUAC data. The function +#' uses a variable name called `cluster` where the primary sampling unit IDs +#' are stored. Make sure to rename your cluster ID variable to `cluster`, otherwise +#' the function will error and terminate the execution. +#' +#' @param wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; +#' otherwise, when a vector of weights if supplied, weighted analysis is done. +#' +#' @param edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' +#' @param .by A vector of class `character` or `numeric` of the geographical areas +#' or respective IDs for where the data was collected and for which the analysis +#' should be summarized at. +#' +#' @returns A summarized table of class `data.frame` of the descriptive +#' statistics about wasting. +#' +#' @examples +#' ## When .by = NULL ---- +#' mw_estimate_prevalence_mfaz( +#' df = anthro.04, +#' wt = NULL, +#' edema = edema, +#' .by = NULL +#' ) +#' +#' ## When .by is not set to NULL ---- +#' mw_estimate_prevalence_mfaz( +#' df = anthro.04, +#' wt = NULL, +#' edema = edema, +#' .by = province +#' ) +#' +#' @export +#' +mw_estimate_prevalence_mfaz <- function(df, + wt = NULL, + edema = NULL, + .by = NULL) { + ## Difuse argument .by ---- + .by <- enquo(.by) + + ## Empty vector ---- + results <- list() + + if (!quo_is_null(.by)) { + ## Check standard deviation ---- + x <- df |> + summarise( + std = rate_std(sd(remove_flags(.data$mfaz, "zscores"), na.rm = TRUE)), + .by = !!.by + ) + } else { + ## Check standard deviation ---- + x <- df |> + summarise( + std = rate_std(sd(remove_flags(.data$mfaz, "zscores"), na.rm = TRUE)) + ) + } + + ## Iterate over data frame to compute prevalence according to the SD ---- + for (i in seq_len(nrow(x))) { + if (!quo_is_null(.by)) { + area <- pull(x, !!.by)[i] + data <- filter(df, !!sym(quo_name(.by)) == !!area) + } else { + data <- df + } + + std <- x$std[i] + if (std != "Problematic") { + ### Compute standard complex sample based prevalence analysis ---- + result <- complex_survey_estimates_mfaz(data, {{ wt }}, {{ edema }}, !!.by) + } else { + ### Compute grouped PROBIT based prevalence ---- + if (!quo_is_null(.by)) { + result <- estimate_probit_prevalence(data, !!.by, .for = "mfaz") + } else { + ### Compute PROBIT based prevalence ---- + result <- estimate_probit_prevalence(data, .for = "mfaz") + } + } + results[[i]] <- result + } + bind_rows(results) |> + relocate(.data$gam_p, .after = .data$gam_n) |> + relocate(.data$sam_p, .after = .data$sam_n) |> + relocate(.data$mam_p, .after = .data$mam_n) +} diff --git a/R/prev_wasting_muac.R b/R/prev_wasting_muac.R new file mode 100644 index 0000000..ec23b8b --- /dev/null +++ b/R/prev_wasting_muac.R @@ -0,0 +1,353 @@ +#' +#' +#' @keywords internal +#' +#' +set_analysis_path <- function(ageratio_class, sd_class) { + ## Enforce class of both arguments ---- + ageratio_class <- as.character(ageratio_class) + sd_class <- as.character(sd_class) + + ## Set the analysis path ---- + case_when( + ageratio_class == "Problematic" & sd_class != "Problematic" ~ "weighted", + ageratio_class != "Problematic" & sd_class == "Problematic" ~ "missing", + ageratio_class == "Problematic" & sd_class == "Problematic" ~ "missing", + .default = "unweighted" + ) +} + + +#' +#' +#' @keywords internal +#' +#' +smart_age_weighting <- function(muac, + age, + edema = NULL, + .form = c("sam", "mam")) { + ## Match arguments ---- + .form <- match.arg(.form) + + if (!is.null(edema)) { + ### Define cases ---- + nut_status <- smart_tool_case_definition(muac = muac, edema = {{ edema }}) + + ### Estimate age-weighted prevalence as per SMART MUAC Tool ---- + age_group <- ifelse(age < 24, "under_2", "over_2") + nut_U2 <- ifelse(age_group == "under_2" & nut_status == .form, 1, 0) + nut_O2 <- ifelse(age_group == "over_2" & nut_status == .form, 1, 0) + p <- mean(nut_U2, na.rm = TRUE) + (2 * mean(nut_O2, na.rm = TRUE)) / 3 + } else { + ### Define cases ---- + nut_status <- smart_tool_case_definition(muac) + + ### Estimate age-weighted prevalence as per SMART MUAC Tool ---- + age_group <- ifelse(age < 24, "under_2", "over_2") + nut_U2 <- ifelse(age_group == "under_2" & nut_status == .form, 1, 0) + nut_O2 <- ifelse(age_group == "over_2" & nut_status == .form, 1, 0) + p <- mean(nut_U2, na.rm = TRUE) + (2 * mean(nut_O2, na.rm = TRUE)) / 3 + } + p +} + + +#' +#' +#' @keywords internal +#' +#' +complex_survey_estimates_muac <- function(df, + wt = NULL, + edema = NULL, + .by = NULL) { + ## Difuse arguments ---- + wt <- enquo(wt) + edema <- enquo(edema) + + ## Defines case based on the availability of edema ---- + if (!quo_is_null(edema)) { + df <- df |> + define_wasting( + muac = .data$muac, + edema = !!edema, + .by = "muac" + ) + } else { + df <- df |> + define_wasting( + muac = .data$muac, + .by = "muac" + ) + } + + ### Weighted survey analysis ---- + if (!is.null(wt)) { + srvy <- df |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG", + weights = !!wt + ) + } else { + ### Unweighted: typical SMART survey analysis ---- + srvy <- df |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG" + ) + } + #### Summarise prevalence ---- + p <- srvy |> + group_by({{ .by }}) |> + filter(.data$flag_mfaz == 0) |> + summarise( + across( + c(.data$gam:.data$mam), + list( + n = \(.)sum(., na.rm = TRUE), + p = \(.)survey_mean(., + vartype = "ci", + level = 0.95, + deff = TRUE, + na.rm = TRUE + ) + ) + ), + wt_pop = sum(srvyr::cur_svy_wts()) + ) + p +} + + +#' +#' Estimate the prevalence of wasting based on MUAC for survey data +#' +#' @description +#' Calculate the prevalence estimates of wasting based on MUAC and/or bilateral +#' edema. +#' Before estimating, the function evaluates the quality of data by calculating +#' and rating the standard deviation of z-scores of muac-for-age (MFAZ) and the +#' p-value of the age ratio test; then it sets the analysis path that best fits +#' the data: +#' + If all tests are rated as not problematic, a normal analysis is done. +#' + If standard deviation is not problematic and age ratio test is problematic, +#' prevalence is age-weighted. This is to fix the likely overestimation of wasting +#' when there are excess of younger children in the data set. +#' + If standard deviation is problematic and age ratio test is not, or both +#' are problematic, analysis gets cancelled out and `NA`s get thrown. +#' +#' Outliers are detected based on SMART flags on the MFAZ values and then +#' get excluded prior being piped into the actual prevalence analysis workflow. +#' +#' @param df A data set object of class `data.frame` to use. This must have been +#' wrangled using this package's wrangling function for MUAC data. Make sure +#' MUAC values are converted to millimeters after using the wrangler. +#' If this is not done, the function will stop execution and return an error message. +#' The function uses a variable name called `cluster` where the primary sampling unit IDs +#' are stored. Make sure the data set has this variable and its name has been +#' renamed to `cluster`, otherwise the function will error and terminate the execution. +#' +#' @param wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; +#' otherwise, when a vector of weights if supplied, weighted analysis is done. +#' +#' @param edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' +#' @param .by A vector of class `character` or `numeric` of the geographical areas +#' or respective IDs for where the data was collected and for which the analysis +#' should be summarized at. +#' +#' @returns A summarized table of class `data.frame` of the descriptive +#' statistics about wasting. +#' +#' @references +#' SMART Initiative (no date). *Updated MUAC data collection tool*. Available at: +#' +#' +#' +#' @seealso [mw_estimate_smart_age_wt()] [mw_estimate_prevalence_mfaz()] +#' [mw_estimate_prevalence_screening()] +#' +#' @examples +#' ## When .by = NULL ---- +#' mw_estimate_prevalence_muac( +#' df = anthro.04, +#' wt = NULL, +#' edema = edema, +#' .by = NULL +#' ) +#' +#' ## When .by is not set to NULL ---- +#' mw_estimate_prevalence_muac( +#' df = anthro.04, +#' wt = NULL, +#' edema = edema, +#' .by = province +#' ) +#' +#' @rdname prev-muac +#' +#' @export +#' +mw_estimate_prevalence_muac <- function(df, + wt = NULL, + edema = NULL, + .by = NULL) { + ## Difuse argument `.by` ---- + .by <- enquo(.by) + + + ## Enforce measuring unit is in "mm" ---- + x <- as.character(pull(df, .data$muac)) + if (any(grepl("\\.", x))) { + stop("MUAC values must be in millimeters. Please try again.") + } + + ## Empty vector type list to store results ---- + results <- list() + + if (!quo_is_null(.by)) { + ## Evaluate the analysis path by `.by` ---- + x <- df |> + group_by(!!.by) |> + summarise( + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), + analysis_approach = set_analysis_path(.data$age_ratio, .data$std), + .groups = "drop" + ) + } else { + ## Evaluate the analysis path ---- + x <- df |> + summarise( + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), + analysis_approach = set_analysis_path(.data$age_ratio, .data$std) + ) + } + + ## Iterate over a data frame and compute estimates as per analysis path ---- + for (i in seq_len(nrow(x))) { + if (!quo_is_null(.by)) { + area <- pull(x, !!.by)[i] + data <- filter(df, !!sym(quo_name(.by)) == area) + } else { + data <- df + } + + analysis_approach <- x$analysis_approach[i] + + if (analysis_approach == "unweighted") { + ### Estimate PPS-based prevalence ---- + output <- complex_survey_estimates_muac(data, {{ wt }}, {{ edema }}, !!.by) + } else if (analysis_approach == "weighted") { + ### Estimate age-weighted prevalence as per SMART MUAC tool ---- + if (!quo_is_null(.by)) { + output <- data |> + mw_estimate_smart_age_wt( + edema = {{ edema }}, + .by = !!.by + ) + } else { + ### Estimate age-weighted prevalence as per SMART MUAC tool ---- + output <- data |> + mw_estimate_smart_age_wt(edema = {{ edema }}) + } + } else { + ## Return NA's ---- + if (!quo_is_null(.by)) { + output <- data |> + summarise( + gam_p = NA_real_, + sam_p = NA_real_, + mam_p = NA_real_, + .by = !!.by + ) + } else { + ## Return NA's ---- + output <- tibble( + gam_p = NA_real_, + sam_p = NA_real_, + mam_p = NA_real_ + ) + } + } + results[[i]] <- output + } + ### Ensure that all categories in `.by` get added to the tibble ---- + if (!quo_is_null(.by)) { + results <- bind_rows(results) |> + relocate(.data$gam_p, .after = .data$gam_n) |> + relocate(.data$sam_p, .after = .data$sam_n) |> + relocate(.data$mam_p, .after = .data$mam_n) + } else { + ## Non-grouped results + results <- bind_rows(results) + } + results +} + +#' +#' +#' @rdname prev-muac +#' +#' @examples +#' ## An application of `mw_estimate_smart_age_wt()` ---- +#' .data <- anthro.04 |> +#' subset(province == "Province 2") +#' +#' mw_estimate_smart_age_wt( +#' df = .data, +#' edema = edema, +#' .by = NULL +#' ) +#' +#' @export +#' +#' +mw_estimate_smart_age_wt <- function(df, edema = NULL, .by = NULL) { + ## Difuse argument `.by` ---- + .by <- enquo(.by) + + ## Enforce measuring unit is in "mm" ---- + x <- as.character(pull(df, .data$muac)) + if (any(grepl("\\.", x))) { + stop("MUAC values must be in millimeters. Please try again.") + } + + if (!quo_is_null(.by)) { + df <- df |> + filter(.data$flag_mfaz == 0) |> + summarise( + sam = smart_age_weighting(.data$muac, .data$age, {{ edema }}, .form = "sam"), + mam = smart_age_weighting(.data$muac, .data$age, {{ edema }}, .form = "mam"), + gam = sum(.data$sam, .data$mam), + .by = !!.by + ) |> + rename( + gam_p = .data$gam, + sam_p = .data$sam, + mam_p = .data$mam + ) + } else { + df <- df |> + filter(.data$flag_mfaz == 0) |> + summarise( + sam = smart_age_weighting(.data$muac, .data$age, {{ edema }}, .form = "sam"), + mam = smart_age_weighting(.data$muac, .data$age, {{ edema }}, .form = "mam"), + gam = sum(.data$sam, .data$mam) + ) |> + rename( + gam_p = .data$gam, + sam_p = .data$sam, + mam_p = .data$mam + ) + } + df +} + diff --git a/R/prev_wasting_probit.R b/R/prev_wasting_probit.R new file mode 100644 index 0000000..74e13c2 --- /dev/null +++ b/R/prev_wasting_probit.R @@ -0,0 +1,118 @@ +#' +#' +#' +#' @keywords internal +#' +#' +apply_probit_method <- function(x, .status = c("gam", "sam")) { + + ## Enforce options in `.status` ---- + .status <- match.arg(.status) + + ## Calculate mean of zscores ---- + mean <- mean(remove_flags(x, "zscores"), na.rm = TRUE) + + ## Estimate prevalence based on probit method, with a SD = 1 ---- + switch( + .status, + "gam" = {pnorm(q = -2, mean = mean, sd = 1, lower.tail = TRUE, log.p = FALSE)}, + "sam" = {pnorm(q = -3, mean = mean, sd = 1, lower.tail = TRUE, log.p = FALSE)} + ) +} + + + +#' +#' +#' @keywords internal +#' +#' +estimate_probit_prevalence <- function(df, + .by = NULL, + .for = c("wfhz", "mfaz")) { + + ## Difuse argument ---- + .by <- enquo(.by) + + ## Enfornce options in `.for` ---- + .for <- match.arg(.for) + + ## Calculate probit-based prevalence ---- + switch( + .for, + "wfhz" = { + if(!is.null(.by)) { + df <- df |> + summarise( + gam = apply_probit_method(.data$wfhz, .status = "gam"), + sam = apply_probit_method(.data$wfhz, .status = "sam"), + mam = .data$gam - .data$sam, + .by = !!.by + ) |> + mutate( + gam_p = .data$gam, + sam_p = .data$sam, + mam_p = .data$mam, + gam = NA, + sam = NA, + mam = NA + ) |> + select(!2:4) # Make it fit in structure of the returned df in the main function + } else { + df <- df |> + summarise( + gam = apply_probit_method(.data$wfhz, .status = "gam"), + sam = apply_probit_method(.data$wfhz, .status = "sam"), + mam = .data$gam - .data$sam + ) |> + mutate( + gam_p = .data$gam, + sam_p = .data$sam, + mam_p = .data$mam, + gam = NA, + sam = NA, + mam = NA + ) |> + select(!2:4) # Make it fit in structure of the returned df in the main function + } + df + }, + "mfaz" = { + if(!is.null(.by)) { + df <- df |> + summarise( + gam = apply_probit_method(.data$mfaz, .status = "gam"), + sam = apply_probit_method(.data$mfaz, .status = "sam"), + mam = .data$gam - .data$sam, + .by = !!.by + ) |> + mutate( + gam_p = .data$gam, + sam_p = .data$sam, + mam_p = .data$mam, + gam = NA, + sam = NA, + mam = NA + ) |> + select(!2:4) # Make it fit in structure of the returned df in the main function + } else { + df <- df |> + summarise( + gam = apply_probit_method(.data$mfaz, .status = "gam"), + sam = apply_probit_method(.data$mfaz, .status = "sam"), + mam = .data$gam - .data$sam + ) |> + mutate( + gam_p = .data$gam, + sam_p = .data$sam, + mam_p = .data$mam, + gam = NA, + sam = NA, + mam = NA + ) |> + select(!2:4)## To make it fit in the structure of the df + } + df + } + ) +} diff --git a/R/prev_wasting_screening.R b/R/prev_wasting_screening.R new file mode 100644 index 0000000..0c42b03 --- /dev/null +++ b/R/prev_wasting_screening.R @@ -0,0 +1,233 @@ +#' +#' +#' @keywords internal +#' +#' +get_estimates <- function(df, muac, edema = NULL, .by = NULL) { + muac <- eval_tidy(enquo(muac), df) + edema <- eval_tidy(enquo(edema), df) + + ## Enforce class of `muac` ---- + if (!is.numeric(muac)) { + stop("`muac` should be of class numeric; not ", shQuote(class(muac)), ". Try again!") + } + + ### Enforce measuring unit is in "mm" ---- + if (any(grepl("\\.", as.character(pull(df, .data$muac))))) { + stop("MUAC values must be in millimeters. Try again!") + } + + + ## Wasting definition including `edema` ---- + if (!is.null(edema)) { + ### Enforce class of `edema` ---- + if (!is.character(edema)) { + stop("`edema` should be of class character; not ", shQuote(class(edema)), ". Try again!") + } + ### Enforce code values in `edema` ---- + if (!all(levels(as.factor(edema)) %in% c("y", "n"))) { + stop("Code values in `edema` must only be 'y' and 'n'. Try again!") + } + ## Wasting definition including `edema` ---- + x <- with( + df, + define_wasting( + df, + muac = muac, + edema = edema, + .by = "muac" + ) + ) + } else { + ## Wasting definition without `edema` ---- + x <- with( + df, + define_wasting( + df, + muac = muac, + .by = "muac" + ) + ) + } + ## Summarize results ---- + p <- x |> + group_by({{ .by }}) |> + filter(.data$flag_mfaz == 0) |> + summarise( + across( + c(.data$gam:.data$mam), + list( + n = \(.)sum(., na.rm = TRUE), + p = \(.)mean(., na.rm = TRUE) + ) + ) + ) + p +} + + +#' +#' +#' Estimate the prevalence of wasting based on MUAC for non survey data +#' +#' @description +#' It is common to estimate prevalence of wasting from non survey data, such +#' as screenings or any other community-based surveillance systems. In such +#' situations, the analysis usually consists only in estimating the point prevalence +#' and the counts of positive cases, without necessarily estimating the +#' uncertainty. This is the job of this function. +#' +#' Before estimating, it evaluates the quality of data by calculating and rating the +#' standard deviation of z-scores of muac-for-age (MFAZ) and the p-value of the +#' age ratio test; then it sets the analysis path that best fits the data. +#' +#' + If all tests are rated as not problematic, a normal analysis is done. +#' + If standard deviation is not problematic and age ratio test is problematic, +#' prevalence is age-weighted. This is to fix the likely overestimation of wasting +#' when there are excess of younger children in the data set. +#' + If standard deviation is problematic and age ratio test is not, or both +#' are problematic, analysis gets cancelled out and `NA`s get thrown. +#' +#' Outliers are detected based on SMART flags on the MFAZ values and then +#' get excluded prior being piped into the actual prevalence analysis workflow. +#' +#' @param df A data set object of class `data.frame` to use. This must have been +#' wrangled using this package's wrangling function for MUAC data. Make sure +#' MUAC values are converted to millimeters after using the wrangler. +#' If this is not done, the function will stop execution and return an error message +#' with the issue. +#' +#' @param muac A vector of raw MUAC values of class `numeric` or `integer`. +#' The measurement unit of the values should be millimeters. If any or all values +#' are in a different unit than the expected, the function will stop execution and +#' return an error message indicating the issue. +#' +#' @param edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' If class, as well as, code values are different than expected, the function +#' will stop the execution and return an error message indicating the issue. +#' +#' @param .by A vector of class `character` or `numeric` of the geographical areas +#' or respective IDs for where the data was collected and for which the analysis +#' should be summarized at. +#' +#' @returns A summarized table of class `data.frame` of the descriptive +#' statistics about wasting. +#' +#' @references +#' SMART Initiative (no date). *Updated MUAC data collection tool*. Available at: +#' +#' +#' @seealso [mw_estimate_prevalence_muac()] [mw_estimate_smart_age_wt()] +#' +#' +#' @examples +#' mw_estimate_prevalence_screening( +#' df = anthro.02, +#' muac = muac, +#' edema = edema, +#' .by = province +#' ) +#' +#' ## With `edema` set to `NULL` ---- +#' mw_estimate_prevalence_screening( +#' df = anthro.02, +#' muac = muac, +#' edema = NULL, +#' .by = province +#' ) +#' +#' ## With `.by` set to `NULL` ---- +#' mw_estimate_prevalence_screening( +#' df = anthro.02, +#' muac = muac, +#' edema = NULL, +#' .by = NULL +#' ) +#' +#' @export +#' +mw_estimate_prevalence_screening <- function(df, + muac, + edema = NULL, + .by = NULL) { + ## Difuse argument `.by` ---- + .by <- enquo(.by) + + ## Empty vector type list to store results ---- + results <- list() + + ## Determine the analysis path that fits the data ---- + if (!quo_is_null(.by)) { + path <- df |> + group_by(!!.by) |> + summarise( + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), + analysis_approach = set_analysis_path(.data$age_ratio, .data$std), + .groups = "drop" + ) + } else { + path <- df |> + summarise( + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), + analysis_approach = set_analysis_path(.data$age_ratio, .data$std) + ) + } + + ## Iterate over a data frame and compute estimates as per analysis path ---- + for (i in seq_len(nrow(path))) { + if (!quo_is_null(.by)) { + area <- pull(path, !!.by)[i] + data <- filter(df, !!sym(quo_name(.by)) == area) + } else { + data <- df + } + + analysis_approach <- path$analysis_approach[i] + if (analysis_approach == "unweighted") { + if (!quo_is_null(.by)) { + output <- get_estimates(df = data, muac = {{ muac }}, edema = {{ edema }}, .by = !!.by) + } else { + output <- get_estimates(df = data, muac = {{ muac }}, edema = {{ edema }}) + } + } else if (analysis_approach == "weighted") { + if (!quo_is_null(.by)) { + output <- mw_estimate_smart_age_wt(df = data, edema = {{ edema }}, .by = !!.by) + } else { + output <- mw_estimate_smart_age_wt(df = data, edema = {{ edema }}) + } + } else { + ## Return NA's ---- + if (!quo_is_null(.by)) { + output <- summarise( + data, + gam_p = NA_real_, + sam_p = NA_real_, + mam_p = NA_real_, + .by = !!.by + ) + } else { + ## Return NA's ---- + output <- tibble( + gam_p = NA_real_, + sam_p = NA_real_, + mam_p = NA_real_ + ) + } + } + results[[i]] <- output + } + ### Ensure that all categories in `.by` get added to the tibble ---- + if (!quo_is_null(.by)) { + results <- bind_rows(results) |> + relocate(.data$gam_p, .after = .data$gam_n) |> + relocate(.data$sam_p, .after = .data$sam_n) |> + relocate(.data$mam_p, .after = .data$mam_n) + } else { + ## Non-grouped results + results <- bind_rows(results) + } + results +} diff --git a/R/prev_wasting_wfhz.R b/R/prev_wasting_wfhz.R new file mode 100644 index 0000000..254be8a --- /dev/null +++ b/R/prev_wasting_wfhz.R @@ -0,0 +1,213 @@ +#' +#' +#' @keywords internal +#' +#' +complex_survey_estimates_wfhz <- function(df, + wt = NULL, + edema = NULL, + .by) { + ## Difuse arguments ---- + wt <- enquo(wt) + edema <- enquo(edema) + + ## Defines case based on the availability of edema ---- + if (!quo_is_null(edema)) { + ## When edema is available ---- + df <- df |> + define_wasting( + zscores = .data$wfhz, + edema = !!edema, + .by = "zscores" + ) + } else { + ## When edema is not available ---- + df <- df |> + define_wasting( + zscores = .data$wfhz, + .by = "zscores" + ) + } + + ## Create a survey object ---- + if (!quo_is_null(wt)) { + srvy <- df |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG", + weights = !!wt + ) + } else { + ## Create survey object ---- + srvy <- df |> + mutate(wt = 1) |> + as_survey_design( + ids = .data$cluster, + pps = "brewer", + variance = "YG", + weights = .data$wt + ) + } + + ## Summarise prevalence ---- + p <- srvy |> + group_by({{ .by }}) |> + filter(.data$flag_wfhz == 0) |> + summarise( + across( + c(.data$gam:.data$mam), + list( + n = \(.)sum(., na.rm = TRUE), + p = \(.)survey_mean(., + vartype = "ci", + level = 0.95, + deff = TRUE, + na.rm = TRUE + ) + ) + ), + wt_pop = round(sum(srvyr::cur_svy_wts())) + ) + p +} + + + +#' +#' +#' Estimate the prevalence of wasting based on z-scores of weight-for-height (WFHZ) +#' +#' @description +#' Calculate the prevalence estimates of wasting based on z-scores of +#' weight-for-height and/or bilateral edema. The function allows users to +#' get the prevalence estimates calculated in accordance with the complex sample +#' design properties; this includes applying survey weights when needed or applicable. +#' +#' Before estimating, the function evaluates the quality of data by calculating +#' and rating the standard deviation of z-scores of WFHZ. If rated as problematic, +#' the prevalence is estimated based on the PROBIT method. +#' +#' Outliers are detected based on SMART flags and get excluded prior being piped +#' into the actual prevalence analysis workflow. +#' +#' @param df A data set object of class `data.frame` to use. This must have been +#' wrangled using this package's wrangling function for WFHZ data. The function +#' uses a variable name called `cluster` where the primary sampling unit IDs +#' are stored. Make sure to rename your cluster ID variable to `cluster`, otherwise +#' the function will error and terminate the execution. +#' +#' @param wt A vector of class `double` of the final survey weights. Default is +#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; +#' otherwise, when a vector of weights if supplied, weighted analysis is done. +#' +#' @param edema A vector of class `character` of edema. Code should be +#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. +#' +#' @param .by A vector of class `character` or `numeric` of the geographical areas +#' or respective IDs for where the data was collected and for which the analysis +#' should be summarised at. +#' +#' @returns A summarised table of class `data.frame` of the descriptive +#' statistics about wasting. +#' +#' @examples +#' ## When .by = NULL ---- +#' ### Start off by wrangling the data ---- +#' data <- mw_wrangle_wfhz( +#' df = anthro.03, +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) +#' +#' ### Now run the prevalence function ---- +#' mw_estimate_prevalence_wfhz( +#' df = data, +#' wt = NULL, +#' edema = edema, +#' .by = NULL +#' ) +#' +#' ## Now when .by is not set to NULL ---- +#' mw_estimate_prevalence_wfhz( +#' df = data, +#' wt = NULL, +#' edema = edema, +#' .by = district +#' ) +#' +#' ## When a weighted analysis is needed ---- +#' mw_estimate_prevalence_wfhz( +#' df = anthro.02, +#' wt = wtfactor, +#' edema = edema, +#' .by = province +#' ) +#' +#' @export +#' +mw_estimate_prevalence_wfhz <- function(df, + wt = NULL, + edema = NULL, + .by = NULL) { + ## Difuse argument `.by` ---- + .by <- enquo(.by) + + ## Empty vector type list ---- + results <- list() + + if (!quo_is_null(.by)) { + ## Rate standard deviation ---- + x <- df |> + summarise( + std = rate_std(sd(remove_flags(.data$wfhz, "zscores"), na.rm = TRUE)), + .by = !!.by + ) + } else { + ## Rate standard deviation ---- + x <- df |> + summarise( + std = rate_std(sd(remove_flags(.data$wfhz, "zscores"), na.rm = TRUE)) + ) + } + + ## Compute prevalence based on the rate of the SD ---- + for (i in seq_len(nrow(x))) { + if (!quo_is_null(.by)) { + area <- pull(x, !!.by)[i] + data <- filter(df, !!sym(quo_name(.by)) == !!area) + } else { + data <- df + } + + std <- x$std[i] + if (std != "Problematic") { + ### Compute complex sample-based prevalence estimates ---- + result <- data |> + complex_survey_estimates_wfhz( + wt = {{ wt }}, + edema = {{ edema }}, + .by = !!.by + ) + } else { + ### Compute PROBIT-based prevalence estimates---- + if (!quo_is_null(.by)) { + result <- data |> + estimate_probit_prevalence( + .by = !!.by, + .for = "wfhz" + ) + } else { + ### Compute PROBIT-based prevalence estimates ---- + result <- estimate_probit_prevalence(data, .for = "wfhz") + } + } + results[[i]] <- result + } + bind_rows(results) |> + relocate(.data$gam_p, .after = .data$gam_n) |> + relocate(.data$sam_p, .after = .data$sam_n) |> + relocate(.data$mam_p, .after = .data$mam_n) +} diff --git a/R/prevalence_combined.R b/R/prevalence_combined.R deleted file mode 100644 index 3b658c2..0000000 --- a/R/prevalence_combined.R +++ /dev/null @@ -1,244 +0,0 @@ -#' -#' Compute combined prevalence of wasting -#' -#' @rdname combined_prevalence -#' -compute_pps_based_combined_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by) { - ## Case definition ---- - df <- with( - df, - define_wasting(df, - zscore = .data$wfhz, - muac = .data$muac, - edema = {{ .edema }}, - base = "combined" - ) |> - mutate( - cflags = ifelse(.data$flag_wfhz == 1 | .data$flag_mfaz == 1, 1, 0) - ) - ) - ## Create survey object ---- - if (!is.null(.wt)) { - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG", - weights = {{ .wt }} - ) - } else { - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG" - ) - } - ## Summarise prevalence ---- - p <- srvy |> - group_by({{ .summary_by }}) |> - filter(.data$cflags == 0) |> - summarise( - across( - c(.data$cgam:.data$cmam), - list( - n = \(.)sum(., na.rm = TRUE), - p = \(.)survey_mean(., - vartype = "ci", - level = 0.95, - deff = TRUE, - na.rm = TRUE - ) - ) - ), - wt_pop = sum(srvyr::cur_svy_wts()) - ) - p -} - - - -#' -#' -#' Compute the prevalence of combined wasting -#' -#' @description -#' The prevalence is calculated in accordance with the complex sample design -#' properties inherent to surveys. This includes weighting of survey data where -#' applicable. When either the acceptability of the standard deviation of WFHZ or -#' of the age ratio test is problematic, prevalence is not calculated. -#' -#' @param df An already wrangled dataset of class `data.frame` to use. Both -#' wranglers (of WFHZ and of MUAC) need to be used sequentially, regardless of the -#' order. Note that MUAC values should be converted to millimeters after using -#' the MUAC wrangler. -#' -#' @param .wt A vector of class `double` of the final survey weights. Default is -#' `NULL` assuming a self-weighted survey, as in the ENA for SMART software; -#' otherwise a weighted analysis is computed. -#' -#' @param .edema A vector of class `character` of edema. Code should be -#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. -#' -#' @param .summary_by A vector of class `character` of the geographical areas -#' where the data was collected and for which the analysis should be performed. -#' -#' @returns A summarised table of class `data.frame` for the descriptive -#' statistics about combined wasting. -#' -#' @details -#' A concept of "combined flags" is introduced in this function. It consists of -#' defining as flag any observation that is flagged in either `flag_wfhz` or -#' `flag_mfaz` vectors. A new column `cflags` for combined flags is created and -#' added to `df`. This ensures that all flagged observations from both WFHZ -#' and MFAZ data are excluded from the combined prevalence analysis. -#' -#' *The table below shows an overview of how `cflags` are defined* -#' | **flag_wfhz** | **flag_mfaz** | **cflags** | -#' | :---: | :---: | :---: | -#' | 1 | 0 | 1 | -#' | 0 | 1 | 1 | -#' | 0 | 0 | 0 | -#' -#' @examples -#' -#' ## When .summary_by and .wt are set to NULL ---- -#' p <- compute_combined_prevalence( -#' df = anthro.02, -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' print(p) -#' -#' ## When .wt is not set to NULL ---- -#' x <- compute_combined_prevalence( -#' df = anthro.02, -#' .wt = "wtfactor", -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' print(x) -#' -#' ## When working on data frame with multiple survey areas ---- -#' s <- anthro.03 |> -#' mw_wrangle_age( -#' dos = NULL, -#' dob = NULL, -#' age = age, -#' .decimals = 2 -#' ) |> -#' mw_wrangle_muac( -#' sex = sex, -#' muac = muac, -#' age = "age", -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' .to = "cm" -#' ) |> -#' dplyr::mutate(muac = recode_muac(muac, .to = "mm")) |> -#' mw_wrangle_wfhz( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE) |> -#' compute_combined_prevalence( -#' .edema = edema, -#' .summary_by = district -#' ) -#' -#' print(s) -#' -#' @export -#' -#' @rdname combined_prevalence -#' -compute_combined_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL) { - ## Difuse argument .summary_by ---- - .summary_by <- rlang::enquo(.summary_by) - - ## An empty vector type list ---- - results <- list() - - if (!rlang::quo_is_null(.summary_by)) { - ## Grouped summary of standard deviation classification ---- - x <- summarise( - df, - std_wfhz = rate_std(sd(remove_flags(as.numeric(.data$wfhz), "zscores"), na.rm = TRUE)), - age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), - std_mfaz = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), - muac_analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std_mfaz), - .by = !!.summary_by - ) - } else { - ## Non-grouped summary ---- - x <- summarise( - df, - std_wfhz = rate_std(sd(remove_flags(as.numeric(.data$wfhz), "zscores"), na.rm = TRUE)), - age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), - std_mfaz = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), - muac_analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std_mfaz), - ) - } - - ## Iterate over data frame to compute prevalence according to the SD ---- - for (i in seq_len(nrow(x))) { - if (!rlang::quo_is_null(.summary_by)) { - area <- dplyr::pull(x, !!.summary_by)[i] - data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area) - } else { - data <- df - } - - std_wfhz <- x$std_wfhz[i] - muac_analysis_approach <- x$muac_analysis_approach[i] - - if (std_wfhz != "Problematic" && muac_analysis_approach == "unweighted") { - ### Compute standard complex sample based prevalence analysis ---- - output <- compute_pps_based_combined_prevalence( - data, - .wt = {{ .wt }}, - .edema = {{ .edema }}, - .summary_by = !!.summary_by - ) - } else { - ## Add grouped NA's ---- - if (!rlang::quo_is_null(.summary_by)) { - output <- summarise( - data, - cgam_p = NA_real_, - csam_p = NA_real_, - cmam_p = NA_real_, - .by = !!.summary_by - ) - } else { - ## Add non-grouped NA's ---- - output <- tibble::tibble( - cgam_p = NA_real_, - csam_p = NA_real_, - cmam_p = NA_real_ - ) - } - } - results[[i]] <- output - } - ### Ensure that all geographical areas are added to the tibble ---- - if (!rlang::quo_is_null(.summary_by)) { - results <- dplyr::bind_rows(results) |> - dplyr::relocate(.data$cgam_p, .after = .data$cgam_n) |> - dplyr::relocate(.data$csam_p, .after = .data$csam_n) |> - dplyr::relocate(.data$cmam_p, .after = .data$cmam_n) - } else { - ## Non-grouped results - results <- dplyr::bind_rows(results) - } - results -} diff --git a/R/prevalence_mfaz.R b/R/prevalence_mfaz.R deleted file mode 100644 index 1371002..0000000 --- a/R/prevalence_mfaz.R +++ /dev/null @@ -1,119 +0,0 @@ -#' -#' -#' -compute_pps_based_mfaz_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by) { - - ## Add acute malnutrition case-definitions to the data frame ---- - df <- with( - df, - define_wasting( - df, - zscore = .data$mfaz, - edema = {{ .edema }}, - base = "wfhz" - ) - ) - ## Create a survey object ---- - if (!is.null(.wt)) { - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG", - weights = {{ .wt }} - ) - } else { - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG" - ) - } - - ## Summarise prevalence ---- - p <- srvy |> - group_by({{ .summary_by }}) |> - filter(.data$flag_mfaz == 0) |> - summarise( - across( - c(.data$gam:.data$mam), - list( - n = \(.)sum(., na.rm = TRUE), - p = \(.)survey_mean(., - vartype = "ci", - level = 0.95, - deff = TRUE, - na.rm = TRUE - ) - ) - ), - wt_pop = round(sum(srvyr::cur_svy_wts())) - ) - p -} - -#' -#' -#' @rdname prevalence -#' -#' -compute_mfaz_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL) { - - ## Difuse argument .summary_by ---- - .summary_by <- rlang::enquo(.summary_by) - - ## An empty vector type list ---- - results <- list() - - if (!rlang::quo_is_null(.summary_by)) { - ## Grouped summary of standard deviation classification ---- - x <- summarise( - df, - std = rate_std(sd(remove_flags(.data$mfaz, "zscores"), na.rm = TRUE)), - .by = !!.summary_by - ) - } else { - ## Non-grouped summary ---- - x <- summarise( - df, - std = rate_std(sd(remove_flags(.data$mfaz, "zscores"), na.rm = TRUE)) - ) - } - - ## Iterate over data frame to compute prevalence according to the SD ---- - for (i in seq_len(nrow(x))) { - if (!rlang::quo_is_null(.summary_by)) { - area <- dplyr::pull(x, !!.summary_by)[i] - data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area) - } else { - data <- df - } - - std <- x$std[i] - if (std != "Problematic") { - ### Compute standard complex sample based prevalence analysis ---- - result <- compute_pps_based_mfaz_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by) - } else { - ### Compute grouped PROBIT based prevalence ---- - if (!rlang::quo_is_null(.summary_by)) { - result <- compute_probit_prevalence(data, !!.summary_by, .for = "mfaz") - } else { - ### Compute non-grouped PROBIT based prevalence ---- - result <- compute_probit_prevalence(data, .for = "mfaz") - } - } - results[[i]] <- result - } - dplyr::bind_rows(results) |> - dplyr::relocate(.data$gam_p, .after = .data$gam_n) |> - dplyr::relocate(.data$sam_p, .after = .data$sam_n) |> - dplyr::relocate(.data$mam_p, .after = .data$mam_n) -} - diff --git a/R/prevalence_muac.R b/R/prevalence_muac.R deleted file mode 100644 index b9ab9fd..0000000 --- a/R/prevalence_muac.R +++ /dev/null @@ -1,309 +0,0 @@ -#' -#' A helper function to determine the MUAC prevalence analysis approach to follow -#' -#' @description -#' It determines the analysis approach to follow for a given analysis area on -#' the basis of the rate of acceptability of the age ratio test and the standard -#' deviation analysis result. -#' -#' @param age_ratio_class A vector of class `character` of the acceptability -#' classification of the age ratio test result. -#' -#' @param sd_class A vector of class `character` of the acceptability -#' classification of the standard deviation analysis result. -#' -#' @returns A vector of class `character` of the same length as the input vectors, -#' containing values indicating the analysis approach for each analysis area: "weighted", -#' "unweighted" and "missing". -#' -#' @details -#' When "weighted", the CDC weighting approach is applied to correct for -#' age bias; when "unweighted" a normal complex sample analysis is applied; when -#' "missing" `NA` gets thrown. -#' -#' -#' -tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { - case_when( - age_ratio_class == "Problematic" & sd_class != "Problematic" ~ "weighted", - age_ratio_class != "Problematic" & sd_class == "Problematic" ~ "missing", - age_ratio_class == "Problematic" & sd_class == "Problematic" ~ "missing", - .default = "unweighted" - ) -} - - -#' -#' -#' Apply the CDC/SMART prevalence weighting approach on MUAC data -#' -#' @description -#' Calculate a weighted prevalence estimate of MUAC by adding the proportion of -#' children under 2 years to twice the proportion of children over 2 and then -#' dividing by 3. -#' -#' @param muac A vector of class `integer` of MUAC values (in mm). -#' -#' @param age A vector of class `double` of child's age in months. -#' -#' @param .edema A vector of class `character` of edema. Code should be -#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. -#' -#' @param status A choice of the form of wasting to be defined. -#' -#' @returns A vector of class `numeric` of length and size 1. -#' -#' @details -#' This function is informed by the output of [mw_stattest_ageratio()]. -#' -#' -apply_cdc_age_weighting <- function(muac, age, - .edema = NULL, status = c("sam", "mam")) { - - ## Match arguments ---- - status <- match.arg(status) - - if (!is.null(.edema)) { - ### Define cases ---- - nut_status <- classify_wasting_for_cdc_approach(muac = muac, .edema = {{ .edema }}) - - ### Compute age weighted prevalence ---- - age_group <- ifelse(age < 24, "under_2", "over_2") - nut_U2 <- ifelse(age_group == "under_2" & nut_status == status, 1, 0) - nut_O2 <- ifelse(age_group == "over_2" & nut_status == status, 1, 0) - p <- mean(nut_U2, na.rm = TRUE) + (2 * mean(nut_O2, na.rm = TRUE)) / 3 - - } else { - ### Define cases ---- - nut_status <- classify_wasting_for_cdc_approach(muac) - - ### Compute age weighted prevalence ---- - age_group <- ifelse(age < 24, "under_2", "over_2") - nut_U2 <- ifelse(age_group == "under_2" & nut_status == status, 1, 0) - nut_O2 <- ifelse(age_group == "over_2" & nut_status == status, 1, 0) - p <- mean(nut_U2, na.rm = TRUE) + (2 * mean(nut_O2, na.rm = TRUE)) / 3 - } - p -} - - - -#' -#' Apply the CDC/SMART prevalence weighting approach on MUAC data -#' -#' @param df An already wrangled dataset object of class `data.frame` to use. -#' -#' @param .edema A vector of class `character` of edema. Code should be -#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. -#' -#' @param .summary_by A vector of class `character` of the geographical areas -#' where the data was collected and for which the analysis should be performed. -#' -#' @returns A table of class `data.frame` of dimensions that vary based on -#' `.summary_by`, containing the results. -#' -#' -compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) { - .summary_by <- rlang::enquo(.summary_by) - - if (!is.null(.summary_by)) { - df <- df |> - filter(.data$flag_mfaz == 0) |> - #mutate(muac = recode_muac(.data$muac, unit = "cm")) |> - summarise( - sam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "sam"), - mam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "mam"), - gam = sum(.data$sam, .data$mam), - .by = !!.summary_by - ) |> - dplyr::rename( - gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam - ) - } else { - df <- df |> - filter(.data$flag_mfaz == 0) |> - mutate(muac = recode_muac(.data$muac, .to = "mm")) |> - summarise( - sam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "sam"), - mam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "mam"), - gam = sum(.data$sam, .data$mam) - ) |> - dplyr::rename( - gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam - ) - } - df -} - - - -#' -#' -#' -#' -compute_pps_based_muac_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL) { - df <- df |> - define_wasting(muac = .data$muac, edema = {{ .edema }}, base = "muac") - - ### Weighted survey analysis ---- - if (!is.null(.wt)) { - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG", - weights = {{ .wt }} - ) - } else { - ### Unweighted: typical SMART survey analysis ---- - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG" - ) - } - #### Summarise prevalence ---- - p <- srvy |> - group_by({{ .summary_by }}) |> - filter(.data$flag_mfaz == 0) |> - summarise( - across( - c(.data$gam:.data$mam), - list( - n = \(.)sum(., na.rm = TRUE), - p = \(.)survey_mean(., - vartype = "ci", - level = 0.95, - deff = TRUE, - na.rm = TRUE - ) - ) - ), - wt_pop = sum(srvyr::cur_svy_wts()) - ) - p -} - - -#' -#' -#' @rdname prevalence -#' -#' @examples -#' -#' ## An example of application of `compute_muac_prevalence()` ---- -#' -#' ### When .summary.by = NULL ---- -#' -#' x <- compute_muac_prevalence( -#' df = anthro.04, -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' print(x) -#' -#' ### When .summary_by is not set to NULL ---- -#' -#' p <- compute_muac_prevalence( -#' df = anthro.04, -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = province -#' ) -#' -#' print(p) -#' -#' @export -#' -compute_muac_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL) { - - ## Difuse argument .summary_by ---- - .summary_by <- rlang::enquo(.summary_by) - - ## An empty vector type list ---- - results <- list() - - if (!rlang::quo_is_null(.summary_by)) { - ## Grouped summary of analysis approach ---- - x <- df |> - group_by(!!.summary_by) |> - summarise( - age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), - std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), - analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std), - .groups = "drop" - ) - } else { - ## Non-grouped summary of analysis approach ---- - x <- df |> - summarise( - age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), - std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), - analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std) - ) - } - - ## Iterate over data frame to compute prevalence according to analysis_approach ---- - for (i in seq_len(nrow(x))) { - if (!rlang::quo_is_null(.summary_by)) { - area <- dplyr::pull(x, !!.summary_by)[i] - data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == area) - } else { - data <- df - } - - analysis_approach <- x$analysis_approach[i] - - if (analysis_approach == "unweighted") { - ### Compute standard complex sample based prevalence analysis ---- - output <- compute_pps_based_muac_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by) - } else if (analysis_approach == "weighted") { - ### Compute grouped weighted prevalence ---- - if (!rlang::quo_is_null(.summary_by)) { - output <- compute_weighted_prevalence(data, .edema = {{ .edema }}, !!.summary_by) - } else { - ### Compute grouped weighted prevalence ---- - output <- compute_weighted_prevalence(data, .edema = {{ .edema }}) - } - } else { - ## Add grouped NA's ---- - if (!rlang::quo_is_null(.summary_by)) { - output <- summarise( - data, - gam_p = NA_real_, - sam_p = NA_real_, - mam_p = NA_real_, - .by = !!.summary_by - ) - } else { - ## Add non-grouped NA's ---- - output <- tibble::tibble( - gam_p = NA_real_, - sam_p = NA_real_, - mam_p = NA_real_ - ) - } - } - results[[i]] <- output - } - ### Ensure that all geographical areas are added to the tibble ---- - if (!rlang::quo_is_null(.summary_by)) { - results <- dplyr::bind_rows(results) |> - dplyr::relocate(.data$gam_p, .after = .data$gam_n) |> - dplyr::relocate(.data$sam_p, .after = .data$sam_n) |> - dplyr::relocate(.data$mam_p, .after = .data$mam_n) - } else { - ## Non-grouped results - results <- dplyr::bind_rows(results) - } - results -} diff --git a/R/prevalence_wfhz.R b/R/prevalence_wfhz.R deleted file mode 100644 index 964b596..0000000 --- a/R/prevalence_wfhz.R +++ /dev/null @@ -1,303 +0,0 @@ -#' -#' Compute the prevalence estimates of wasting on the basis of WFHZ, MFAZ or MUAC -#' -#' @description -#' The prevalence is calculated in accordance with the complex sample design -#' properties inherent to surveys. This includes weighting the survey data where -#' applicable and applying PROBIT method estimation (for WFHZ) when the standard -#' deviation is problematic. This is as in the SMART Methodology. -#' -#' @param df An already wrangled dataset object of class `data.frame` to use. -#' -#' @param .wt A vector of class `double` of the final survey weights. Default is -#' `NULL` assuming a self weighted survey, as in the ENA for SMART software; -#' otherwise, when a vector of weights if supplied, weighted analysis is computed. -#' -#' @param .edema A vector of class `character` of edema. Code should be -#' "y" for presence and "n" for absence of bilateral edema. Default is `NULL`. -#' -#' @param .summary_by A vector of class `character` of the geographical areas -#' where the data was collected and for which the analysis should be performed. -#' -#' @returns A summarised table of class `data.frame` of the descriptive -#' statistics about wasting. -#' -#' @examples -#' ## An example of application of `compute_wfhz_prevalence()` ---- -#' -#' ### When .summary_by = NULL ---- -#' anthro.03 |> -#' mw_wrangle_wfhz( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' compute_wfhz_prevalence( -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = NULL -#' ) -#' -#' ### When .summary_by is not set to NULL ---- -#' -#' anthro.03 |> -#' mw_wrangle_wfhz( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' compute_wfhz_prevalence( -#' .wt = NULL, -#' .edema = edema, -#' .summary_by = district -#' ) -#' -#' ### When a weighted analysis is needed ---- -#' -#' anthro.02 |> -#' compute_wfhz_prevalence( -#' .wt = "wtfactor", -#' .edema = edema, -#' .summary_by = province -#' ) -#' -#' @rdname prevalence -#' -#' @export -#' -compute_wfhz_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by = NULL) { - - ## Difuse argument .summary_by ---- - .summary_by <- rlang::enquo(.summary_by) - - ## An empty vector type list ---- - results <- list() - - if (!rlang::quo_is_null(.summary_by)) { - ## Grouped summary of standard deviation classification ---- - x <- summarise( - df, - std = rate_std(sd(remove_flags(.data$wfhz, "zscores"), na.rm = TRUE)), - .by = !!.summary_by - ) - } else { - ## Non-grouped summary ---- - x <- summarise( - df, - std = rate_std(sd(remove_flags(.data$wfhz, "zscores"), na.rm = TRUE)) - ) - } - - ## Iterate over data frame to compute prevalence according to the SD ---- - for (i in seq_len(nrow(x))) { - if (!rlang::quo_is_null(.summary_by)) { - area <- dplyr::pull(x, !!.summary_by)[i] - data <- filter(df, !!sym(rlang::quo_name(.summary_by)) == !!area) - } else { - data <- df - } - - std <- x$std[i] - if (std != "Problematic") { - ### Compute standard complex sample based prevalence analysis ---- - result <- compute_pps_based_wfhz_prevalence(data, {{ .wt }}, {{ .edema }}, !!.summary_by) - } else { - ### Compute grouped PROBIT based prevalence ---- - if (!rlang::quo_is_null(.summary_by)) { - result <- compute_probit_prevalence(data, !!.summary_by, .for = "wfhz") - } else { - ### Compute non-grouped PROBIT based prevalence ---- - result <- compute_probit_prevalence(data, .for = "wfhz") - } - } - results[[i]] <- result - } - dplyr::bind_rows(results) |> - dplyr::relocate(.data$gam_p, .after = .data$gam_n) |> - dplyr::relocate(.data$sam_p, .after = .data$sam_n) |> - dplyr::relocate(.data$mam_p, .after = .data$mam_n) -} - - - -#' -#' -#' -#' -compute_pps_based_wfhz_prevalence <- function(df, - .wt = NULL, - .edema = NULL, - .summary_by) { - - ## Add acute malnutrition case-definitions to the data frame ---- - df <- with( - df, - define_wasting( - df, - zscore = .data$wfhz, - edema = {{ .edema }}, - base = "wfhz" - ) - ) - ## Create a survey object ---- - if (!is.null(.wt)) { - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG", - weights = {{ .wt }} - ) - } else { - srvy <- df |> - as_survey_design( - ids = .data$cluster, - pps = "brewer", - variance = "YG" - ) - } - - ## Summarise prevalence ---- - p <- srvy |> - group_by({{ .summary_by }}) |> - filter(.data$flag_wfhz == 0) |> - summarise( - across( - c(.data$gam:.data$mam), - list( - n = \(.)sum(., na.rm = TRUE), - p = \(.)survey_mean(., - vartype = "ci", - level = 0.95, - deff = TRUE, - na.rm = TRUE - ) - ) - ), - wt_pop = round(sum(srvyr::cur_svy_wts())) - ) - p -} - - -#' -#' -#' Compute the prevalence estimates of wasting on the basis of the PROBIT method. -#' -#' @description -#' This approach is applied when the standard deviation of WFHZ is problematic. -#' The PROBIT method estimates the prevalence of wasting indirectly by calculating -#' the area under the tail of the curve, from negative infinitive to -#' the given threshold, using the cumulative normal distribution function with -#' the mean and standard deviation as inputs. -#' -#' @param df An already wrangled dataset object of class `data.frame` to use. -#' -#' @param x A vector of class `double` of WFHZ or MFAZ values. -#' -#' @param .status A choice of the form of wasting for which the prevalence should -#' be estimated. -#' -#' @param .summary_by A vector of class `character` of the geographical areas -#' where the data was collected and for which the analysis should be performed. -#' -#' @param .for A choice between "wfhz" and "mfaz" for the anthropometric index. -#' -#' @returns A summarised table of class `data.frame` of the prevalence estimates. -#' No confidence intervals are yielded. -#' -#' @rdname probit-method -#' -#' -apply_probit_approach <- function(x, .status = c("gam", "sam")) { - .status <- match.arg(.status) - mean <- mean(remove_flags(x, "zscores"), na.rm = TRUE) - ## Return GAM and SAM prevalence with a SD = 1 - switch( - .status, - "gam" = {pnorm(q = -2, mean = mean, sd = 1, lower.tail = TRUE, log.p = FALSE)}, - "sam" = {pnorm(q = -3, mean = mean, sd = 1, lower.tail = TRUE, log.p = FALSE)} - ) -} - - - -#' -#' -#' @rdname probit-method -#' -compute_probit_prevalence <- function(df, - .summary_by = NULL, - .for = c("wfhz", "mfaz")) { - ## Difuse argument ---- - .summary_by <- rlang::enquo(.summary_by) - ## Match argument ---- - .for <- match.arg(.for) - - switch( - .for, - "wfhz" = { - if(!is.null(.summary_by)) { - df <- summarise( - df, - gam = apply_probit_approach(.data$wfhz, .status = "gam"), - sam = apply_probit_approach(.data$wfhz, .status = "sam"), - mam = .data$gam - .data$sam, - .by = !!.summary_by - ) |> - mutate( - gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam, - gam = NA, sam = NA, mam = NA - ) |> - dplyr::select(!2:4) ## To make it fit in the tibble structure from the main function - } else { - df <- summarise( - df, - gam = apply_probit_approach(.data$wfhz, .status = "gam"), - sam = apply_probit_approach(.data$wfhz, .status = "sam"), - mam = .data$gam - .data$sam - ) |> - mutate( - gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam, - gam = NA, sam = NA, mam = NA - ) |> - dplyr::select(!2:4) ## To make it fit in the tibble structure from the main function - } - df - }, - "mfaz" = { - if(!is.null(.summary_by)) { - df <- summarise( - df, - gam = apply_probit_approach(.data$mfaz, .status = "gam"), - sam = apply_probit_approach(.data$mfaz, .status = "sam"), - mam = .data$gam - .data$sam, - .by = !!.summary_by - ) |> - mutate( - gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam, - gam = NA, sam = NA, mam = NA - ) |> - dplyr::select(!2:4) ## To make it fit in the tibble structure from the main function - } else { - df <- summarise( - df, - gam = apply_probit_approach(.data$mfaz, .status = "gam"), - sam = apply_probit_approach(.data$mfaz, .status = "sam"), - mam = .data$gam - .data$sam - ) |> - mutate( - gam_p = .data$gam, sam_p = .data$sam, mam_p = .data$mam, - gam = NA, sam = NA, mam = NA - ) |> - dplyr::select(!2:4) ## To make it fit in the tibble structure from the main function - } - df - } - ) -} diff --git a/R/quality_raters.R b/R/quality_raters.R index aeb493b..e75068f 100644 --- a/R/quality_raters.R +++ b/R/quality_raters.R @@ -7,11 +7,11 @@ #' and raw MUAC data following the SMART methodology criteria. #' #' @param p A vector of class `double`, containing the proportions of flagged -#' records in the dataset. If the class does not match the expected type, the +#' records in the data set. If the class does not match the expected type, the #' function will stop execution and return an error message indicating the type #' of mismatch. #' -#' @param .in Specifies the dataset where the rating should be done, +#' @param .in Specifies the data set where the rating should be done, #' with options: "wfhz", "mfaz", or "raw_muac". #' #' @returns A vector of class `factor` of the same length as input, for the @@ -63,10 +63,10 @@ rate_propof_flagged <- function(p, .in = c("mfaz", "wfhz", "raw_muac")) { #' Rating follows the SMART methodology criteria. #' #' @param sd A vector of class `double`, containing values of the standard deviation -#' from the dataset. If the class does not match the expected type, the function +#' from the data set. If the class does not match the expected type, the function #' will stop execution and return an error message indicating the type of mismatch. #' -#' @param .of Specifies the dataset where the rating should be done, with options: +#' @param .of Specifies the data set where the rating should be done, with options: #' "wfhz", "mfaz", or "raw_muac". #' #' @returns A vector of class `factor` of the same length as input, for the diff --git a/R/utils.R b/R/utils.R index 340c718..b597e1d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -72,7 +72,7 @@ get_age_months <- function(dos, dob) { #' millimeters or greater than 200 millimeters. #' #' Removing outliers consist in setting the outlier record to `NA` and not necessarily -#' to delete it from the dataset. This is useful in the analysis procedures +#' to delete it from the data set. This is useful in the analysis procedures #' where outliers must be removed, such as the analysis of the standard deviation. #' #' @param x A vector of class `numeric` of WFHZ, MFAZ, HFAZ, WFAZ or raw MUAC values. @@ -111,13 +111,15 @@ get_age_months <- function(dos, dob) { #' x <- anthro.01$muac #' #' ## Apply the function with `.from` set to "raw_muac" ---- -#' flag_outliers(x, .from = "raw_muac") +#' m <- flag_outliers(x, .from = "raw_muac") +#' head(m) #' #' ## Sample data of z-scores (be it WFHZ, MFAZ, HFAZ or WFAZ) ---- #' x <- anthro.02$mfaz #' #' # Apply the function with `.from` set to "zscores" ---- -#' flag_outliers(x, .from = "zscores") +#' z <- flag_outliers(x, .from = "zscores") +#' tail(z) #' #' @rdname outliers #' @export @@ -153,17 +155,21 @@ flag_outliers <- function(x, .from = c("zscores", "raw_muac")) { #' #' @examples #' ## With `.from` set to "zscores" ---- -#' remove_flags( +#' z <- remove_flags( #' x = wfhz.01$wfhz, #' .from = "zscores" #' ) #' +#' head(z) +#' #' ## With `.from` set to "raw_muac" ---- -#' remove_flags( +#' m <- remove_flags( #' x = mfaz.01$muac, #' .from = "raw_muac" #' ) #' +#' tail(m) +#' #' @rdname outliers #' #' @export @@ -205,7 +211,7 @@ remove_flags <- function(x, .from = c("zscores", "raw_muac")) { #' values are in the opposite unit of the intended conversion. If not, #' execution stops and an error message is returned. #' -#' @param x A vector of the raw MUAC values. The class can either be +#' @param x A vector of raw MUAC values. The class can either be #' `double` or `numeric` or `integer`. If different than expected, the function #' will stop execution and return an error message indicating the type of mismatch. #' @@ -224,12 +230,14 @@ remove_flags <- function(x, .from = c("zscores", "raw_muac")) { #' x = anthro.01$muac, #' .to = "cm" #' ) +#' head(muac_cm) #' #' ## Using the `muac_cm` object to recode it back to "mm" ---- #' muac_mm <- recode_muac( #' x = muac_cm, #' .to = "mm" #' ) +#' tail(muac_mm) #' #' @export #' @@ -248,7 +256,7 @@ recode_muac <- function(x, .to = c("cm", "mm")) { switch(.to, ### Recode to centimeters ---- "cm" = { - #### Enforce measuring unit is in "cm" ---- + #### Enforce measuring unit is in "mm" ---- if (any(grepl("\\.", as.character(x)))) { stop("MUAC values are not in millimeters. Please try again.") } diff --git a/R/wrangle_age.R b/R/wrangle_age.R index 1f360bf..d699c38 100644 --- a/R/wrangle_age.R +++ b/R/wrangle_age.R @@ -8,7 +8,7 @@ #' setting to `NA` the age values that are less than 6.0 and greater than or equal #' to 60.0 months old. #' -#' @param df A dataset of class `data.frame` to wrangle age from. +#' @param df A data set of class `data.frame` to wrangle age from. #' #' @param dos A vector of class `Date` for date of data collection from the #' `df`. Default is `NULL`. diff --git a/R/wrangle_muac.R b/R/wrangle_muac.R index 9fd6d44..9c75226 100644 --- a/R/wrangle_muac.R +++ b/R/wrangle_muac.R @@ -7,7 +7,7 @@ #' in detecting outliers from the raw MUAC values. The function only works after #' the age has been wrangled. #' -#' @param df A dataset object of class `data.frame` to wrangle data from. +#' @param df A data set object of class `data.frame` to wrangle data from. #' #' @param sex A `numeric` or `character` vector of child's sex. Code values should #' only be 1 or "m" for males and 2 or "f" for females. Make sure sex values diff --git a/R/wrangle_wfhz.R b/R/wrangle_wfhz.R index 74740e0..0889d04 100644 --- a/R/wrangle_wfhz.R +++ b/R/wrangle_wfhz.R @@ -5,7 +5,7 @@ #' Calculate z-scores for weight-for-height (WFHZ) and identify outliers based on #' the SMART methodology. #' -#' @param df A dataset object of class `data.frame` to wrangle data from. +#' @param df A data set object of class `data.frame` to wrangle data from. #' #' @param sex A `numeric` or `character` vector of child's sex. Code values should #' only be 1 or "m" for males and 2 or "f" for females. Make sure sex values @@ -31,8 +31,8 @@ #' `flag_wfhz`, of child's WFHZ and detected outliers, will be created. #' #' @references -#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief -#' and Transition*. Manual 2.0. Available at: . +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief* +#' *and Transition*. Manual 2.0. Available at: . #' #' @seealso #' [flag_outliers()] [remove_flags()] diff --git a/README.md b/README.md index 59c2b21..0eecfb9 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ -# `mwana`: Utilities for analysing children’s nutritional status +# `mwana`: An efficient workflow for plausibility checks and prevalence analysis of wasting in R @@ -19,129 +19,121 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](h [![DOI](https://zenodo.org/badge/867609177.svg)](https://zenodo.org/badge/latestdoi/867609177) -Child anthropometric assessments, implemented routinely in most -countries worldwide, are the cornerstones of child nutrition and food -security surveillance around the world. Ensuring the quality of child -anthropometric data, the accuracy of child undernutrition prevalence -estimates, and the timeliness of reporting is therefore critical in -establishing accurate, robust, and up-to-date child undernutrition -status globally. +Child anthropometric assessments are the cornerstones of child nutrition +and food security surveillance around the world. Ensuring the quality of +data from these assessments is paramount to obtaining accurate child +undernutrition prevalence estimates. Additionally, the timeliness of +reporting is, as well, critical to allowing timely situation analyses +and responses to tackle the needs of the affected population. `mwana`, term for *child* in *Elómwè*, a local language spoken in the -central-northern regions of Mozambique, and also a word with a similar -meaning across other Bantu languages (such as Swahili) spoken in many -parts of Africa, is a package that streamlines child anthropometry data -quality checks and undernutrition prevalence estimation for children -6-59 months old through comprehensive implementation of the SMART -Methodology guidelines in R. +central-northern regions of Mozambique, with a similar meaning across +other Bantu languages, such as Swahili, spoken in many parts of Africa, +is a package that streamlines data quality checks and wasting prevalence +estimation from anthropometric data of children aged 6 to 59 months old +through a comprehensive implementation of the SMART Methodology +guidelines in R. ## Motivation `mwana` was borne out of the author’s own experience of having to work -with multiple child anthropometric datasets to conduct data quality -appraisal and prevalence estimation as part of the data quality -assurance team of the Integrated Phase Classification (IPC). The current -standard child anthropometric data appraisal workflow is extremely -cumbersome requiring significant time and effort utilising different -software tools (SPSS, Excel, Emergency Nutrition Assessment or ENA -software) for each step of the process for a single dataset. This -process is repeated for every dataset needing to be processed and often -needing to be implemented in a relatively short period time. This manual -and repetitive process, by its nature, is extremely error-prone. - -`mwana`, which is primarily an R-based implementation of the ENA for -SMART software, simplifies this cumbersome workflow into a programmable -process particularly when handling large multiple datasets. +with multiple child anthropometric data sets to conduct data quality +appraisal and prevalence estimation as part of the analysis Quality +Assurance Team of the Integrated Phase Classification (IPC) Global +Support Unit. The current standard child anthropometric data appraisal +workflow is extremely cumbersome, requiring significant time and effort +utilizing different software tools - SPSS, Excel, Emergency Nutrition +Assessment or ENA software - for each step of the process for a single +data set. This process is repeated for every data set needing to be +processed and often needing to be implemented in a relatively short +period of time. This manual and repetitive process, by its nature, is +extremely error-prone. + +`mwana` simplifies this cumbersome workflow into a programmable process +particularly when handling multiple-area data set. > [!NOTE] > > `mwana` was made possible thanks to the state-of-the-art work in > nutrition survey guidance led by the [SMART > initiative](https://smartmethodology.org). Under the hood, `mwana` -> bundles the SMART guidance through the use of the National Information -> Platforms for Nutrition Anthropometric Data Toolkit (nipnTK) -> functionalities in `R` to build its handy function around plausibility -> checks. Click [here](https://github.com/nutriverse/nipnTK) to learn -> more about the `nipnTK` package. +> bundles the SMART Methodology guidance, for both survey and non survey +> data, through the use of the National Information Platforms for +> Nutrition Anthropometric Data Toolkit (nipnTK) functionalities in `R` +> to build its handy function around plausibility checks and wasting +> prevalence estimation. Click +> [here](https://github.com/nutriverse/nipnTK) to learn more about the +> {`nipnTK`} package. ## What does `mwana` do? -It automates plausibility checks and prevalence analyses and respective -summaries of the outputs. +It automates plausibility checks, prevalence analyses, and summary +outputs, providing particular advantages when handling data sets with +multiple areas. ### Plausibility checks. - `mwana` performs plausibility checks on weight-for-height z-score - (WFHZ)-based data by mimicking the SMART plausibility checkers in ENA - for SMART software, their scoring and classification criterion. Read - guide + (WFHZ) data by mimicking the SMART plausibility checkers in ENA for + SMART software, their scoring and classification criterion. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-wfhz-data). - It performs, as well, plausibility checks on MUAC data. For this, - `mwana` integrates recent advances in using MUAC-for-age z-score - (MFAZ) for assessing the plausibility and the acceptability of MUAC + `mwana` integrates recent advances in using muac-for-age z-score + (MFAZ) for checking the plausibility and the acceptability of MUAC data. In this way, when the variable age is available: `mwana` performs plausibility checks similar to those in WFHZ, with a few - differences in the scoring criteria for flagged data. Otherwise, when - the variables age is missing, a similar test suit used in the current - version of ENA is performed. Read guide + differences in the scoring criteria for the percent of flagged data. + Otherwise, when the variables age is missing, a similar test suit used + in the current version of ENA is performed. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-mfaz-data). -#### A useful workflow for plausibility check using `mwana` +#### A tidy workflow for plausibility check using `mwana` -### Prevalence analysis +### Prevalence estimation -`mwana` prevalence calculators were built to take decisions on the +`mwana` prevalence estimators were built to take decisions on the appropriate analysis procedure to follow based on the quality of the -data, as per the SMART rules. It returns an output table with the -appropriate results based on the data quality test results. -Fundamentally, the calculators loop over the survey areas in the dataset -whilst performing quality appraisal and taking decisions on the -appropriate prevalence analysis procedure to follow on the basis of the -result. +data, as per the SMART rules. They return output tables with summarized +results based on the data quality test results. Fundamentally, the +functions loop over the survey areas in the data set whilst doing +quality checks and taking decisions on the appropriate prevalence +analysis path that best fits the data. -`mwana` computes prevalence for: +`mwana` estimates wasting prevalence on the basis of: -- Wasting on the basis of WFHZ and/edema; read the guide +- WFHZ and/or edema. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-wfhz) -- Wasting on the basis of raw MUAC values and/or edema: here, when - variable age is available, mwana applies MFAZ flags, otherwise it - applies the flagging criteria around the raw MUAC values, to exclude - outliers before computing prevalence, but the actual prevalence is - done on the raw values; read the guide +- Raw MUAC values and/or edema. When variable age is available, + detection and removal of outliers is based on MFAZ, otherwise based on + the raw MUAC values. This is simply to exclude outliers; the actual + prevalence estimation is based on the raw MUAC values. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-muac). -- Wasting on the basis of MFAZ and/edema: outliers excluded using MFAZ - flags; read guide +- MFAZ and/or edema. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-prevalence-of-wasting-based-on-mfaz). -- Combined prevalence of wasting: here a concept of combined flags is - used to streamline the flags removed in WFHZ and those in MUAC; read - guide +- Combined prevalence. A concept of combined flags is used to streamline + the flags removed in WFHZ and those in MUAC. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-combined-prevalence-of-wasting). -`mwana` provides weighted prevalence analysis, if needed. And this is -controlled by the user. This is possible in all calculators, including -for MUAC, combined, which is not currently available in ENA for SMART. - In the context of IPC Acute Malnutrition (IPC AMN) analysis workflow, -`mwana` provides a handy function for checking if the minimum sample -size requirements in a given area were met on the basis of the -methodology used to collect the data: survey, screening or sentinel -sites. Read guide +`mwana` provides a handy function for checking whether the minimum +sample size requirements of a given area were met, on the basis of the +methodology used to collect the data, be it a survey, a screening or a +sentinel site data. Read the guide [here](https://nutriverse.io/mwana/articles/sample_size.html). > [!TIP] > > If you are undertaking a research and you want to wrangle your data -> before including in your statistical models, etc, `mwana` is a great -> helper. +> before using it in your statistical models, `mwana` is a great helper. > [!WARNING] > > Please note that `mwana` is still highly experimental and is > undergoing a lot of development. Hence, any functionalities described -> below have a high likelihood of changing interface or approach as we +> above have a high likelihood of changing interface or approach as we > aim for a stable working version. ## Installation @@ -172,17 +164,17 @@ function as follows: citation("mwana") #> To cite mwana: in publications use: #> -#> Tomás Zaba, Ernest Guevarra (2024). _mwana: Utilities for Analysing -#> Children's Nutritional Status_. R package version 0.0.0.9000, -#> . +#> Tomás Zaba, Ernest Guevarra (2024). _mwana: An Efficient Workflow for +#> Plausibility Checks and Prevalence Analysis of Wasting in R_. R +#> package version 0.1.0, . #> #> A BibTeX entry for LaTeX users is #> #> @Manual{, -#> title = {mwana: Utilities for Analysing Children's Nutritional Status}, +#> title = {mwana: An Efficient Workflow for Plausibility Checks and Prevalence Analysis of Wasting in R}, #> author = {{Tomás Zaba} and {Ernest Guevarra}}, #> year = {2024}, -#> note = {R package version 0.0.0.9000}, +#> note = {R package version 0.1.0}, #> url = {https://github.com/nutriverse/mwana}, #> } ``` diff --git a/README.qmd b/README.qmd index d6ee535..917cb42 100644 --- a/README.qmd +++ b/README.qmd @@ -15,7 +15,7 @@ knitr: library(mwana) ``` -# `mwana`: Utilities for analysing children's nutritional status +# `mwana`: An efficient workflow for plausibility checks and prevalence analysis of wasting in R [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) @@ -28,31 +28,31 @@ library(mwana) [![DOI](https://zenodo.org/badge/867609177.svg)](https://zenodo.org/badge/latestdoi/867609177) -Child anthropometric assessments, implemented routinely in most countries worldwide, are the cornerstones of child nutrition and food security surveillance around the world. Ensuring the quality of child anthropometric data, the accuracy of child undernutrition prevalence estimates, and the timeliness of reporting is therefore critical in establishing accurate, robust, and up-to-date child undernutrition status globally. +Child anthropometric assessments are the cornerstones of child nutrition and food security surveillance around the world. Ensuring the quality of data from these assessments is paramount to obtaining accurate child undernutrition prevalence estimates. Additionally, the timeliness of reporting is, as well, critical to allowing timely situation analyses and responses to tackle the needs of the affected population. -`mwana`, term for *child* in *Elómwè*, a local language spoken in the central-northern regions of Mozambique, and also a word with a similar meaning across other Bantu languages (such as Swahili) spoken in many parts of Africa, is a package that streamlines child anthropometry data quality checks and undernutrition prevalence estimation for children 6-59 months old through comprehensive implementation of the SMART Methodology guidelines in R. +`mwana`, term for *child* in *Elómwè*, a local language spoken in the central-northern regions of Mozambique, with a similar meaning across other Bantu languages, such as Swahili, spoken in many parts of Africa, is a package that streamlines data quality checks and wasting prevalence estimation from anthropometric data of children aged 6 to 59 months old through a comprehensive implementation of the SMART Methodology guidelines in R. ## Motivation -`mwana` was borne out of the author’s own experience of having to work with multiple child anthropometric datasets to conduct data quality appraisal and prevalence estimation as part of the data quality assurance team of the Integrated Phase Classification (IPC). The current standard child anthropometric data appraisal workflow is extremely cumbersome requiring significant time and effort utilising different software tools (SPSS, Excel, Emergency Nutrition Assessment or ENA software) for each step of the process for a single dataset. This process is repeated for every dataset needing to be processed and often needing to be implemented in a relatively short period time. This manual and repetitive process, by its nature, is extremely error-prone. +`mwana` was borne out of the author’s own experience of having to work with multiple child anthropometric data sets to conduct data quality appraisal and prevalence estimation as part of the analysis Quality Assurance Team of the Integrated Phase Classification (IPC) Global Support Unit. The current standard child anthropometric data appraisal workflow is extremely cumbersome, requiring significant time and effort utilizing different software tools - SPSS, Excel, Emergency Nutrition Assessment or ENA software - for each step of the process for a single data set. This process is repeated for every data set needing to be processed and often needing to be implemented in a relatively short period of time. This manual and repetitive process, by its nature, is extremely error-prone. -`mwana`, which is primarily an R-based implementation of the ENA for SMART software, simplifies this cumbersome workflow into a programmable process particularly when handling large multiple datasets. +`mwana` simplifies this cumbersome workflow into a programmable process particularly when handling multiple-area data set. :::{.callout-note} -`mwana` was made possible thanks to the state-of-the-art work in nutrition survey guidance led by the [SMART initiative](https://smartmethodology.org). Under the hood, `mwana` bundles the SMART guidance through the use of the National Information Platforms for Nutrition Anthropometric Data Toolkit (nipnTK) functionalities in `R` to build its handy function around plausibility checks. Click [here](https://github.com/nutriverse/nipnTK) to learn more about the `nipnTK` package. +`mwana` was made possible thanks to the state-of-the-art work in nutrition survey guidance led by the [SMART initiative](https://smartmethodology.org). Under the hood, `mwana` bundles the SMART Methodology guidance, for both survey and non survey data, through the use of the National Information Platforms for Nutrition Anthropometric Data Toolkit (nipnTK) functionalities in `R` to build its handy function around plausibility checks and wasting prevalence estimation. Click [here](https://github.com/nutriverse/nipnTK) to learn more about the {`nipnTK`} package. ::: ## What does `mwana` do? -It automates plausibility checks and prevalence analyses and respective summaries of the outputs. +It automates plausibility checks, prevalence analyses, and summary outputs, providing particular advantages when handling data sets with multiple areas. ### Plausibility checks. - + `mwana` performs plausibility checks on weight-for-height z-score (WFHZ)-based data by mimicking the SMART plausibility checkers in ENA for SMART software, their scoring and classification criterion. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-wfhz-data). + + `mwana` performs plausibility checks on weight-for-height z-score (WFHZ) data by mimicking the SMART plausibility checkers in ENA for SMART software, their scoring and classification criterion. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-wfhz-data). - + It performs, as well, plausibility checks on MUAC data. For this, `mwana` integrates recent advances in using MUAC-for-age z-score (MFAZ) for assessing the plausibility and the acceptability of MUAC data. In this way, when the variable age is available: `mwana` performs plausibility checks similar to those in WFHZ, with a few differences in the scoring criteria for flagged data. Otherwise, when the variables age is missing, a similar test suit used in the current version of ENA is performed. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-mfaz-data). + + It performs, as well, plausibility checks on MUAC data. For this, `mwana` integrates recent advances in using muac-for-age z-score (MFAZ) for checking the plausibility and the acceptability of MUAC data. In this way, when the variable age is available: `mwana` performs plausibility checks similar to those in WFHZ, with a few differences in the scoring criteria for the percent of flagged data. Otherwise, when the variables age is missing, a similar test suit used in the current version of ENA is performed. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-mfaz-data). -#### A useful workflow for plausibility check using `mwana` +#### A tidy workflow for plausibility check using `mwana` ```{r} #| label: workflow #| echo: false @@ -103,27 +103,25 @@ digraph mwana { ", width = 400, height = 450) ``` -### Prevalence analysis +### Prevalence estimation -`mwana` prevalence calculators were built to take decisions on the appropriate analysis procedure to follow based on the quality of the data, as per the SMART rules. It returns an output table with the appropriate results based on the data quality test results. Fundamentally, the calculators loop over the survey areas in the dataset whilst performing quality appraisal and taking decisions on the appropriate prevalence analysis procedure to follow on the basis of the result. +`mwana` prevalence estimators were built to take decisions on the appropriate analysis procedure to follow based on the quality of the data, as per the SMART rules. They return output tables with summarized results based on the data quality test results. Fundamentally, the functions loop over the survey areas in the data set whilst doing quality checks and taking decisions on the appropriate prevalence analysis path that best fits the data. -`mwana` computes prevalence for: +`mwana` estimates wasting prevalence on the basis of: - + Wasting on the basis of WFHZ and/edema; read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-wfhz) - + Wasting on the basis of raw MUAC values and/or edema: here, when variable age is available, mwana applies MFAZ flags, otherwise it applies the flagging criteria around the raw MUAC values, to exclude outliers before computing prevalence, but the actual prevalence is done on the raw values; read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-muac). - + Wasting on the basis of MFAZ and/edema: outliers excluded using MFAZ flags; read guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-prevalence-of-wasting-based-on-mfaz). - + Combined prevalence of wasting: here a concept of combined flags is used to streamline the flags removed in WFHZ and those in MUAC; read guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-combined-prevalence-of-wasting). + + WFHZ and/or edema. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-wfhz) + + Raw MUAC values and/or edema. When variable age is available, detection and removal of outliers is based on MFAZ, otherwise based on the raw MUAC values. This is simply to exclude outliers; the actual prevalence estimation is based on the raw MUAC values. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-muac). + + MFAZ and/or edema. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-prevalence-of-wasting-based-on-mfaz). + + Combined prevalence. A concept of combined flags is used to streamline the flags removed in WFHZ and those in MUAC. Read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-combined-prevalence-of-wasting). -`mwana` provides weighted prevalence analysis, if needed. And this is controlled by the user. This is possible in all calculators, including for MUAC, combined, which is not currently available in ENA for SMART. - -In the context of IPC Acute Malnutrition (IPC AMN) analysis workflow, `mwana` provides a handy function for checking if the minimum sample size requirements in a given area were met on the basis of the methodology used to collect the data: survey, screening or sentinel sites. Read guide [here](https://nutriverse.io/mwana/articles/sample_size.html). +In the context of IPC Acute Malnutrition (IPC AMN) analysis workflow, `mwana` provides a handy function for checking whether the minimum sample size requirements of a given area were met, on the basis of the methodology used to collect the data, be it a survey, a screening or a sentinel site data. Read the guide [here](https://nutriverse.io/mwana/articles/sample_size.html). :::{.callout-tip} -If you are undertaking a research and you want to wrangle your data before including in your statistical models, etc, `mwana` is a great helper. +If you are undertaking a research and you want to wrangle your data before using it in your statistical models, `mwana` is a great helper. ::: :::{.callout-warning} -Please note that `mwana` is still highly experimental and is undergoing a lot of development. Hence, any functionalities described below have a high likelihood of changing interface or approach as we aim for a stable working version. +Please note that `mwana` is still highly experimental and is undergoing a lot of development. Hence, any functionalities described above have a high likelihood of changing interface or approach as we aim for a stable working version. ::: ## Installation diff --git a/inst/CITATION b/inst/CITATION index 3ec7bfa..2a972d9 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,9 +1,9 @@ bibentry( bibtype = "Manual", header = "To cite mwana: in publications use:", - title = "mwana: Utilities for Analysing Children's Nutritional Status", + title = "mwana: An Efficient Workflow for Plausibility Checks and Prevalence Analysis of Wasting in R", author = c(person("Tomás Zaba"), person("Ernest Guevarra")), year = 2024, - note = "R package version 0.0.0.9000", + note = "R package version 0.2.0", url = "https://github.com/nutriverse/mwana", ) diff --git a/man/anthro.03.Rd b/man/anthro.03.Rd index 1d5414e..a32ca8c 100644 --- a/man/anthro.03.Rd +++ b/man/anthro.03.Rd @@ -25,13 +25,13 @@ Anonymous anthro.03 } \description{ -\code{anthro.03} contains survey data of four districts. Each district dataset +\code{anthro.03} contains survey data of four districts. Each district data set presents distinct data quality scenarios that requires tailored prevalence analysis approach: two districts show a problematic WFHZ standard deviation whilst the remaining are all within range. This sample data is useful to demonstrate the use of the prevalence functions on -a multi-area survey data where there can be variations in the rating of +a multiple-area survey data where there can be variations in the rating of acceptability of the standard deviation, hence require different analyses approaches for each area to ensure accurate estimation. } diff --git a/man/anthro.04.Rd b/man/anthro.04.Rd index 5cfae6e..69f4ed1 100644 --- a/man/anthro.04.Rd +++ b/man/anthro.04.Rd @@ -25,7 +25,7 @@ anthro.04 } \description{ Data was generated through a community-based sentinel site conducted -across three provinces. Each province's dataset presents distinct +across three provinces. Each province's data set presents distinct data quality scenarios, requiring tailored prevalence analysis: \itemize{ \item "Province 1" has MFAZ's standard deviation and age ratio test rating of @@ -36,7 +36,7 @@ standard deviation of MFAZ; } This sample data is useful to demonstrate the use of prevalence functions on -a multi-area survey data where variations in the rating of acceptability of the +a multiple-area survey data where variations in the rating of acceptability of the standard deviation exist, hence require different analyses approaches for each area to ensure accurate estimation. } diff --git a/man/apply_cdc_age_weighting.Rd b/man/apply_cdc_age_weighting.Rd deleted file mode 100644 index b29f4de..0000000 --- a/man/apply_cdc_age_weighting.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_muac.R -\name{apply_cdc_age_weighting} -\alias{apply_cdc_age_weighting} -\title{Apply the CDC/SMART prevalence weighting approach on MUAC data} -\usage{ -apply_cdc_age_weighting(muac, age, .edema = NULL, status = c("sam", "mam")) -} -\arguments{ -\item{muac}{A vector of class \code{integer} of MUAC values (in mm).} - -\item{age}{A vector of class \code{double} of child's age in months.} - -\item{.edema}{A vector of class \code{character} of edema. Code should be -"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} - -\item{status}{A choice of the form of wasting to be defined.} -} -\value{ -A vector of class \code{numeric} of length and size 1. -} -\description{ -Calculate a weighted prevalence estimate of MUAC by adding the proportion of -children under 2 years to twice the proportion of children over 2 and then -dividing by 3. -} -\details{ -This function is informed by the output of \code{\link[=mw_stattest_ageratio]{mw_stattest_ageratio()}}. -} diff --git a/man/case_definition.Rd b/man/case_definition.Rd deleted file mode 100644 index 4de9245..0000000 --- a/man/case_definition.Rd +++ /dev/null @@ -1,84 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/case_definitions.R -\name{define_wasting_cases_muac} -\alias{define_wasting_cases_muac} -\alias{define_wasting_cases_whz} -\alias{define_wasting_cases_combined} -\alias{define_wasting} -\title{Define wasting based on WFHZ, MFAZ, MUAC and Combined criteria} -\usage{ -define_wasting_cases_muac(muac, edema = NULL, cases = c("gam", "sam", "mam")) - -define_wasting_cases_whz(zscore, edema = NULL, cases = c("gam", "sam", "mam")) - -define_wasting_cases_combined( - zscore, - muac, - edema = NULL, - cases = c("cgam", "csam", "cmam") -) - -define_wasting( - df, - zscore = NULL, - muac = NULL, - edema = NULL, - base = c("wfhz", "muac", "combined") -) -} -\arguments{ -\item{muac}{A vector of class \code{integer} of MUAC values in millimeters.} - -\item{edema}{A vector of class \code{character} of edema. Code should be -"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} - -\item{cases}{A choice of the form of wasting to be defined.} - -\item{zscore}{A vector of class \code{double} of WFHZ values (with 3 decimal places).} - -\item{df}{A dataset object of class \code{data.frame} to use.} - -\item{base}{A choice of the criterion on which the case-definition should be based.} -} -\value{ -A vector of class \code{numeric} of dummy values: 1 for case and 0 -for not case. -} -\description{ -Define if a given observation in the dataset is wasted or not, on the basis of -WFHZ, MFAZ, MUAC and the combined criteria. -} -\details{ -Use \code{define_wasting()} to add the case-definitions to data frame. -} -\examples{ - -## Weight-for-height based case-definition ---- -x <- anthro.02 |> -define_wasting( -zscore = wfhz, -edema = edema, -base = "wfhz" -) -head(x) - -## MUAC-based case-definition ---- -x <- anthro.02 |> -define_wasting( -muac = muac, -edema = edema, -base = "muac" -) -head(x) - -## Combined case-definition ---- -x <- anthro.02 |> -define_wasting( -zscore = wfhz, -muac = muac, -edema = edema, -base = "combined" -) -head(x) - -} diff --git a/man/classify_wasting_for_cdc_approach.Rd b/man/classify_wasting_for_cdc_approach.Rd deleted file mode 100644 index 26f11d4..0000000 --- a/man/classify_wasting_for_cdc_approach.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/case_definitions.R -\name{classify_wasting_for_cdc_approach} -\alias{classify_wasting_for_cdc_approach} -\title{Classify wasting into severe or moderate wasting to be used in the -SMART MUAC tool weighting approach} -\usage{ -classify_wasting_for_cdc_approach(muac, .edema = NULL) -} -\arguments{ -\item{muac}{A vector of class \code{integer} of MUAC values in millimeters.} - -\item{.edema}{A vector of class \code{character} of edema. Code should be -"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} -} -\value{ -A vector of class \code{character} of the same length as \code{muac} and \code{.edema} -indicating if a child is severe or moderately wasted or not wasted. -} -\description{ -Classify wasting into severe or moderate wasting to be used in the -SMART MUAC tool weighting approach -} diff --git a/man/combined_prevalence.Rd b/man/combined_prevalence.Rd deleted file mode 100644 index 9f6f540..0000000 --- a/man/combined_prevalence.Rd +++ /dev/null @@ -1,108 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_combined.R -\name{compute_pps_based_combined_prevalence} -\alias{compute_pps_based_combined_prevalence} -\alias{compute_combined_prevalence} -\title{Compute combined prevalence of wasting} -\usage{ -compute_pps_based_combined_prevalence( - df, - .wt = NULL, - .edema = NULL, - .summary_by -) - -compute_combined_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{An already wrangled dataset of class \code{data.frame} to use. Both -wranglers (of WFHZ and of MUAC) need to be used sequentially, regardless of the -order. Note that MUAC values should be converted to millimeters after using -the MUAC wrangler.} - -\item{.wt}{A vector of class \code{double} of the final survey weights. Default is -\code{NULL} assuming a self-weighted survey, as in the ENA for SMART software; -otherwise a weighted analysis is computed.} - -\item{.edema}{A vector of class \code{character} of edema. Code should be -"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} - -\item{.summary_by}{A vector of class \code{character} of the geographical areas -where the data was collected and for which the analysis should be performed.} -} -\value{ -A summarised table of class \code{data.frame} for the descriptive -statistics about combined wasting. -} -\description{ -The prevalence is calculated in accordance with the complex sample design -properties inherent to surveys. This includes weighting of survey data where -applicable. When either the acceptability of the standard deviation of WFHZ or -of the age ratio test is problematic, prevalence is not calculated. -} -\details{ -A concept of "combined flags" is introduced in this function. It consists of -defining as flag any observation that is flagged in either \code{flag_wfhz} or -\code{flag_mfaz} vectors. A new column \code{cflags} for combined flags is created and -added to \code{df}. This ensures that all flagged observations from both WFHZ -and MFAZ data are excluded from the combined prevalence analysis. - -\emph{The table below shows an overview of how \code{cflags} are defined}\tabular{ccc}{ - \strong{flag_wfhz} \tab \strong{flag_mfaz} \tab \strong{cflags} \cr - 1 \tab 0 \tab 1 \cr - 0 \tab 1 \tab 1 \cr - 0 \tab 0 \tab 0 \cr -} -} -\examples{ - -## When .summary_by and .wt are set to NULL ---- -p <- compute_combined_prevalence( -df = anthro.02, -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -print(p) - -## When .wt is not set to NULL ---- -x <- compute_combined_prevalence( -df = anthro.02, -.wt = "wtfactor", -.edema = edema, -.summary_by = NULL -) - -print(x) - -## When working on data frame with multiple survey areas ---- -s <- anthro.03 |> -mw_wrangle_age( -dos = NULL, -dob = NULL, -age = age, -.decimals = 2 -) |> -mw_wrangle_muac( -sex = sex, -muac = muac, -age = "age", -.recode_sex = TRUE, -.recode_muac = TRUE, -.to = "cm" -) |> -dplyr::mutate(muac = recode_muac(muac, .to = "mm")) |> -mw_wrangle_wfhz( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE) |> -compute_combined_prevalence( -.edema = edema, -.summary_by = district -) - -print(s) - -} diff --git a/man/compute_weighted_prevalence.Rd b/man/compute_weighted_prevalence.Rd deleted file mode 100644 index e55d415..0000000 --- a/man/compute_weighted_prevalence.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_muac.R -\name{compute_weighted_prevalence} -\alias{compute_weighted_prevalence} -\title{Apply the CDC/SMART prevalence weighting approach on MUAC data} -\usage{ -compute_weighted_prevalence(df, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} - -\item{.edema}{A vector of class \code{character} of edema. Code should be -"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} - -\item{.summary_by}{A vector of class \code{character} of the geographical areas -where the data was collected and for which the analysis should be performed.} -} -\value{ -A table of class \code{data.frame} of dimensions that vary based on -\code{.summary_by}, containing the results. -} -\description{ -Apply the CDC/SMART prevalence weighting approach on MUAC data -} diff --git a/man/define_wasting.Rd b/man/define_wasting.Rd new file mode 100644 index 0000000..e6592f0 --- /dev/null +++ b/man/define_wasting.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prev_define_wasting.R +\name{define_wasting} +\alias{define_wasting} +\title{Define wasting} +\usage{ +define_wasting( + df, + zscores = NULL, + muac = NULL, + edema = NULL, + .by = c("zscores", "muac", "combined") +) +} +\arguments{ +\item{df}{A data set object of class \code{data.frame} to use. It must have been +wrangled using this package's wrangling functions for WFHZ or MUAC, or both +(for combined) as appropriate.} + +\item{zscores}{A vector of class \code{double} of WFHZ or MFAZ values. If the class +does not match the expected type, the function will stop execution and return +an error message indicating the type of mismatch.} + +\item{muac}{A vector of class \code{integer} or \code{numeric} of raw MUAC values in +millimeters. If the class does not match the expected type, the function will +stop execution and return an error message indicating the type of mismatch.} + +\item{edema}{A vector of class \code{character} of edema. Default is \code{NULL}. +If the class does not match the expected type, the function will stop execution +and return an error message indicating the type of mismatch. Code values should be +"y" for presence and "n" for absence of bilateral edema. If different, the +function will stop execution and return an error indicating the issue.} + +\item{.by}{A choice of the criterion by which the case-definition should done. +Choose \code{zscores} for WFHZ or MFAZ, \code{muac} for raw MUAC and \code{combined} for +combined.} +} +\value{ +Three vectors named \code{gam}, \code{sam} and \code{mam}, of class \code{numeric}, same +length as inputs, containing dummy values: 1 for case and 0 for not case. +This is added to \code{df}. When \code{combined} is selected, vector's names become +\code{cgam}, \code{csam} and \code{cmam}. +} +\description{ +Define if a given observation in the data set is wasted or not, and its +respective form of wasting (global, severe or moderate) on the basis of +z-scores of weight-for-height (WFHZ), muac-for-age (MFAZ), raw MUAC values and +combined case-definition. +} +\examples{ +## Case-definition by z-scores ---- +z <- anthro.02 |> + define_wasting( + zscores = wfhz, + muac = NULL, + edema = edema, + .by = "zscores" + ) +head(z) + +## Case-definition by MUAC ---- +m <- anthro.02 |> + define_wasting( + zscores = NULL, + muac = muac, + edema = edema, + .by = "muac" + ) +head(m) + +## Case-definition by combined ---- +c <- anthro.02 |> + define_wasting( + zscores = wfhz, + muac = muac, + edema = edema, + .by = "combined" + ) +head(c) + +} diff --git a/man/mw_check_ipcamn_ssreq.Rd b/man/mw_check_ipcamn_ssreq.Rd index 96c218a..233f05f 100644 --- a/man/mw_check_ipcamn_ssreq.Rd +++ b/man/mw_check_ipcamn_ssreq.Rd @@ -7,7 +7,7 @@ mw_check_ipcamn_ssreq(df, cluster, .source = c("survey", "screening", "ssite")) } \arguments{ -\item{df}{A dataset object of class \code{data.frame} to check.} +\item{df}{A data set object of class \code{data.frame} to check.} \item{cluster}{A vector of class \code{integer} or \code{character} of unique cluster or screening or sentinel site IDs. If a \code{character} vector, ensure that names are @@ -23,13 +23,13 @@ screening data; "ssite" for community-based sentinel site data.} A summary table of class \code{data.frame}, of length 3 and width 1, for the check results. \code{n_clusters} is for the total number of unique clusters or screening or site IDs; \code{n_obs} for the correspondent total number of children -in the dataset; and \code{meet_ipc} for whether the IPC AMN requirements were met. +in the data set; and \code{meet_ipc} for whether the IPC AMN requirements were met. } \description{ Evidence on the prevalence of acute malnutrition used in the IPC AMN can come from different sources: surveys, screenings or community-based surveillance system. The IPC set minimum sample size requirements -for each source. This function helps in verifying whether the requirements +for each source. This function helps in verifying whether those requirements were met or not depending on the source. } \examples{ diff --git a/man/mw_estimate_prevalence_combined.Rd b/man/mw_estimate_prevalence_combined.Rd new file mode 100644 index 0000000..5ce3b05 --- /dev/null +++ b/man/mw_estimate_prevalence_combined.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prev_wasting_combined.R +\name{mw_estimate_prevalence_combined} +\alias{mw_estimate_prevalence_combined} +\title{Estimate the prevalence of combined wasting} +\usage{ +mw_estimate_prevalence_combined(df, wt = NULL, edema = NULL, .by = NULL) +} +\arguments{ +\item{df}{A data set object of class \code{data.frame} to use. This must have been +wrangled using this package's wrangling functions for both WFHZ and MUAC data +sequentially. The order does not matter. Note that MUAC values should be +converted to millimeters after using the MUAC wrangler. If this is not done, +the function will stop execution and return an error message. Moreover, the +function uses a variable called \code{cluster} where the primary sampling unit IDs +are stored. Make sure to rename your cluster ID variable to \code{cluster}, otherwise +the function will error and terminate the execution.} + +\item{wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self-weighted survey, as in the ENA for SMART software; +otherwise a weighted analysis is computed.} + +\item{edema}{A vector of class \code{character} of edema. Code will be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} + +\item{.by}{A vector of class \code{character} or \code{numeric} of the geographical areas +or respective IDs for where the data was collected and for which the analysis +should be summarised at.} +} +\value{ +A summarised table of class \code{data.frame} for the descriptive +statistics about combined wasting. +} +\description{ +Estimate the prevalence of wasting based on the combined case-definition of +weight-for-height z-scores (WFHZ), MUAC and/or edema. The function allows users to +get the prevalence estimates in accordance with the complex sample +design properties; this includes applying survey weights when needed or applicable. +Before estimating, the function evaluates the quality of data by calculating +and rating the standard deviation of WFHZ and MFAZ, as well as the p-value of +the age ratio test. +Prevalence will be calculated only when the rating of all test is as not +problematic concurrently. If either of them is problematic, it cancels out +the analysis and \code{NA}s get thrown. + +Outliers are detected in both WFHZ and in MUAC data set (through z-scores) +based on SMART flags get excluded prior being piped into the actual prevalence +analysis workflow. +} +\details{ +A concept of "combined flags" is introduced in this function. It consists of +defining as flag any observation that is flagged in either \code{flag_wfhz} or +\code{flag_mfaz} vectors. A new column \code{cflags} for combined flags is created and +added to \code{df}. This ensures that all flagged observations from both WFHZ +and MFAZ data are excluded from the prevalence analysis. + +\emph{A glimpse on how \code{cflags} are defined:}\tabular{ccc}{ + \strong{flag_wfhz} \tab \strong{flag_mfaz} \tab \strong{cflags} \cr + 1 \tab 0 \tab 1 \cr + 0 \tab 1 \tab 1 \cr + 0 \tab 0 \tab 0 \cr +} +} +\examples{ +## When .by and wt are set to NULL ---- +mw_estimate_prevalence_combined( + df = anthro.02, + wt = NULL, + edema = edema, + .by = NULL +) + +## When wt is not set to NULL ---- +mw_estimate_prevalence_combined( + df = anthro.02, + wt = wtfactor, + edema = edema, + .by = NULL +) + +} diff --git a/man/mw_estimate_prevalence_mfaz.Rd b/man/mw_estimate_prevalence_mfaz.Rd new file mode 100644 index 0000000..0214fc1 --- /dev/null +++ b/man/mw_estimate_prevalence_mfaz.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prev_wasting_mfaz.R +\name{mw_estimate_prevalence_mfaz} +\alias{mw_estimate_prevalence_mfaz} +\title{Estimate the prevalence of wasting based on z-scores of muac-for-age (MFAZ)} +\usage{ +mw_estimate_prevalence_mfaz(df, wt = NULL, edema = NULL, .by = NULL) +} +\arguments{ +\item{df}{A data set object of class \code{data.frame} to use. This must have been +wrangled using this package's wrangling function for MUAC data. The function +uses a variable name called \code{cluster} where the primary sampling unit IDs +are stored. Make sure to rename your cluster ID variable to \code{cluster}, otherwise +the function will error and terminate the execution.} + +\item{wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; +otherwise, when a vector of weights if supplied, weighted analysis is done.} + +\item{edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} + +\item{.by}{A vector of class \code{character} or \code{numeric} of the geographical areas +or respective IDs for where the data was collected and for which the analysis +should be summarized at.} +} +\value{ +A summarized table of class \code{data.frame} of the descriptive +statistics about wasting. +} +\description{ +Calculate the prevalence estimates of wasting based on z-scores of +muac-for-age and/or bilateral edema. The function allows users to +get the prevalence estimates calculated in accordance with the complex sample +design properties; this includes applying survey weights when needed or applicable. + +Before estimating, the function evaluates the quality of data by calculating +and rating the standard deviation of z-scores of MFAZ. If rated as problematic, +the prevalence is estimated based on the PROBIT method. + +Outliers are detected based on SMART flags and get excluded prior prevalence analysis. +} +\examples{ +## When .by = NULL ---- +mw_estimate_prevalence_mfaz( + df = anthro.04, + wt = NULL, + edema = edema, + .by = NULL +) + +## When .by is not set to NULL ---- +mw_estimate_prevalence_mfaz( + df = anthro.04, + wt = NULL, + edema = edema, + .by = province +) + +} diff --git a/man/mw_estimate_prevalence_screening.Rd b/man/mw_estimate_prevalence_screening.Rd new file mode 100644 index 0000000..a904a9b --- /dev/null +++ b/man/mw_estimate_prevalence_screening.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prev_wasting_screening.R +\name{mw_estimate_prevalence_screening} +\alias{mw_estimate_prevalence_screening} +\title{Estimate the prevalence of wasting based on MUAC for non survey data} +\usage{ +mw_estimate_prevalence_screening(df, muac, edema = NULL, .by = NULL) +} +\arguments{ +\item{df}{A data set object of class \code{data.frame} to use. This must have been +wrangled using this package's wrangling function for MUAC data. Make sure +MUAC values are converted to millimeters after using the wrangler. +If this is not done, the function will stop execution and return an error message +with the issue.} + +\item{muac}{A vector of raw MUAC values of class \code{numeric} or \code{integer}. +The measurement unit of the values should be millimeters. If any or all values +are in a different unit than the expected, the function will stop execution and +return an error message indicating the issue.} + +\item{edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}. +If class, as well as, code values are different than expected, the function +will stop the execution and return an error message indicating the issue.} + +\item{.by}{A vector of class \code{character} or \code{numeric} of the geographical areas +or respective IDs for where the data was collected and for which the analysis +should be summarized at.} +} +\value{ +A summarized table of class \code{data.frame} of the descriptive +statistics about wasting. +} +\description{ +It is common to estimate prevalence of wasting from non survey data, such +as screenings or any other community-based surveillance systems. In such +situations, the analysis usually consists only in estimating the point prevalence +and the counts of positive cases, without necessarily estimating the +uncertainty. This is the job of this function. + +Before estimating, it evaluates the quality of data by calculating and rating the +standard deviation of z-scores of muac-for-age (MFAZ) and the p-value of the +age ratio test; then it sets the analysis path that best fits the data. +\itemize{ +\item If all tests are rated as not problematic, a normal analysis is done. +\item If standard deviation is not problematic and age ratio test is problematic, +prevalence is age-weighted. This is to fix the likely overestimation of wasting +when there are excess of younger children in the data set. +\item If standard deviation is problematic and age ratio test is not, or both +are problematic, analysis gets cancelled out and \code{NA}s get thrown. +} + +Outliers are detected based on SMART flags on the MFAZ values and then +get excluded prior being piped into the actual prevalence analysis workflow. +} +\examples{ +mw_estimate_prevalence_screening( + df = anthro.02, + muac = muac, + edema = edema, + .by = province +) + +## With `edema` set to `NULL` ---- +mw_estimate_prevalence_screening( + df = anthro.02, + muac = muac, + edema = NULL, + .by = province +) + +## With `.by` set to `NULL` ---- +mw_estimate_prevalence_screening( + df = anthro.02, + muac = muac, + edema = NULL, + .by = NULL +) + +} +\references{ +SMART Initiative (no date). \emph{Updated MUAC data collection tool}. Available at: +\url{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/} +} +\seealso{ +\code{\link[=mw_estimate_prevalence_muac]{mw_estimate_prevalence_muac()}} \code{\link[=mw_estimate_smart_age_wt]{mw_estimate_smart_age_wt()}} +} diff --git a/man/mw_estimate_prevalence_wfhz.Rd b/man/mw_estimate_prevalence_wfhz.Rd new file mode 100644 index 0000000..9905c6e --- /dev/null +++ b/man/mw_estimate_prevalence_wfhz.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prev_wasting_wfhz.R +\name{mw_estimate_prevalence_wfhz} +\alias{mw_estimate_prevalence_wfhz} +\title{Estimate the prevalence of wasting based on z-scores of weight-for-height (WFHZ)} +\usage{ +mw_estimate_prevalence_wfhz(df, wt = NULL, edema = NULL, .by = NULL) +} +\arguments{ +\item{df}{A data set object of class \code{data.frame} to use. This must have been +wrangled using this package's wrangling function for WFHZ data. The function +uses a variable name called \code{cluster} where the primary sampling unit IDs +are stored. Make sure to rename your cluster ID variable to \code{cluster}, otherwise +the function will error and terminate the execution.} + +\item{wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; +otherwise, when a vector of weights if supplied, weighted analysis is done.} + +\item{edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} + +\item{.by}{A vector of class \code{character} or \code{numeric} of the geographical areas +or respective IDs for where the data was collected and for which the analysis +should be summarised at.} +} +\value{ +A summarised table of class \code{data.frame} of the descriptive +statistics about wasting. +} +\description{ +Calculate the prevalence estimates of wasting based on z-scores of +weight-for-height and/or bilateral edema. The function allows users to +get the prevalence estimates calculated in accordance with the complex sample +design properties; this includes applying survey weights when needed or applicable. + +Before estimating, the function evaluates the quality of data by calculating +and rating the standard deviation of z-scores of WFHZ. If rated as problematic, +the prevalence is estimated based on the PROBIT method. + +Outliers are detected based on SMART flags and get excluded prior being piped +into the actual prevalence analysis workflow. +} +\examples{ +## When .by = NULL ---- +### Start off by wrangling the data ---- +data <- mw_wrangle_wfhz( + df = anthro.03, + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE +) + +### Now run the prevalence function ---- +mw_estimate_prevalence_wfhz( + df = data, + wt = NULL, + edema = edema, + .by = NULL +) + +## Now when .by is not set to NULL ---- +mw_estimate_prevalence_wfhz( + df = data, + wt = NULL, + edema = edema, + .by = district +) + +## When a weighted analysis is needed ---- +mw_estimate_prevalence_wfhz( + df = anthro.02, + wt = wtfactor, + edema = edema, + .by = province +) + +} diff --git a/man/mw_neat_output_mfaz.Rd b/man/mw_neat_output_mfaz.Rd index 0690f6d..ed75ee6 100644 --- a/man/mw_neat_output_mfaz.Rd +++ b/man/mw_neat_output_mfaz.Rd @@ -8,12 +8,13 @@ for improved clarity and readability} mw_neat_output_mfaz(df) } \arguments{ -\item{df}{A data frame containing the summary table returned by this package's -MFAZ plausibility check function. Must be of class \code{data.frame}.} +\item{df}{An object of class \code{data.frame} returned by this package's +plausibility checker for MFAZ data, containing the summarized results to be +formatted.} } \value{ -A data frame of the same length and width as \code{df}, with column names and -values formatted for clarity. +A \code{data.frame} object of the same length and width as \code{df}, with column names and +values formatted for clarity and readability. } \description{ Clean and format the output table returned from the MFAZ plausibility check diff --git a/man/mw_neat_output_muac.Rd b/man/mw_neat_output_muac.Rd index c6a1396..b73db99 100644 --- a/man/mw_neat_output_muac.Rd +++ b/man/mw_neat_output_muac.Rd @@ -8,12 +8,13 @@ for improved clarity and readability.} mw_neat_output_muac(df) } \arguments{ -\item{df}{A data frame containing the summary table returned by this package's -plausibility check function for raw MUAC data. Must be of class \code{data.frame}.} +\item{df}{An object of class \code{data.frame} returned by this package's +plausibility checker for raw MUAC data, containing the summarized results to be +formatted.} } \value{ -A data frame of the same length and width as \code{df}, with column names and -values formatted for clarity. +A \code{data.frame} object of the same length and width as \code{df}, with column names and +values formatted for clarity and readability. } \description{ Clean and format the output table returned from the plausibility check of raw diff --git a/man/mw_neat_output_wfhz.Rd b/man/mw_neat_output_wfhz.Rd index d8b3c0b..5606a19 100644 --- a/man/mw_neat_output_wfhz.Rd +++ b/man/mw_neat_output_wfhz.Rd @@ -8,12 +8,13 @@ for improved clarity and readability} mw_neat_output_wfhz(df) } \arguments{ -\item{df}{A data frame containing the summary table returned by this package's -WFHZ plausibility check function. Must be of class \code{data.frame}.} +\item{df}{An object of class \code{data.frame} returned by this package's +plausibility checker for WFHZ data, containing the summarized results to be +formatted.} } \value{ -A data frame of the same length and width as \code{df}, with column names and -values formatted for clarity. +A \code{data.frame} object of the same length and width as \code{df}, with column names and +values formatted for clarity and readability. } \description{ Clean and format the output table returned from the WFHZ plausibility check diff --git a/man/mw_plausibility_check_mfaz.Rd b/man/mw_plausibility_check_mfaz.Rd index 8a4d59f..6ddc35e 100644 --- a/man/mw_plausibility_check_mfaz.Rd +++ b/man/mw_plausibility_check_mfaz.Rd @@ -7,7 +7,7 @@ mw_plausibility_check_mfaz(df, sex, muac, age, flags) } \arguments{ -\item{df}{A dataset object of class \code{data.frame} to check.} +\item{df}{A data set object of class \code{data.frame} to check.} \item{sex}{A vector of class \code{numeric} of child's sex.} @@ -18,20 +18,29 @@ mw_plausibility_check_mfaz(df, sex, muac, age, flags) \item{flags}{A vector of class \code{numeric} of flagged records.} } \value{ -A summarised table of class \code{data.frame}, of length 17 and width 1, for +A summarized table of class \code{data.frame}, of length 17 and width 1, for the plausibility test results and their respective acceptability ratings. } \description{ Check the overall plausibility and acceptability of MFAZ data through a structured test suite encompassing sampling and measurement-related biases checks -in the dataset. The test suite in this function follows the recommendation made +in the data set. The test suite in this function follows the recommendation made by Bilukha, O., & Kianian, B. (2023) on the plausibility of -constructing a comprehensive plausibility check similar to WFHZ to evaluate the -acceptability of MUAC data when the variable age exists in the dataset. +constructing a comprehensive plausibility check for MUAC data similar to WFHZ +to evaluate its acceptability when the variable age exists in the data set. The function works on a data frame returned from this package's wrangling function for age and for MFAZ data. } +\details{ +Whilst the function uses the same test checks and criteria as that of WFHZ +in the SMART plausibility check, the percent of flagged data is evaluated +using a different cut-off points, with a maximum acceptability of 2.0\%, +as shown below:\tabular{cccc}{ + \strong{Excellent} \tab \strong{Good} \tab \strong{Acceptable} \tab \strong{Problematic} \cr + 0.0 - 1.0 \tab >1.0 - 1.5 \tab >1.5 - 2.0 \tab >2.0 \cr +} +} \examples{ ## First wrangle age data ---- data <- mw_wrangle_age( diff --git a/man/mw_plausibility_check_muac.Rd b/man/mw_plausibility_check_muac.Rd index 9686240..ec051d1 100644 --- a/man/mw_plausibility_check_muac.Rd +++ b/man/mw_plausibility_check_muac.Rd @@ -7,7 +7,7 @@ mw_plausibility_check_muac(df, sex, muac, flags) } \arguments{ -\item{df}{A dataset object of class \code{data.frame} to check. It must have been +\item{df}{An object of class \code{data.frame} to check. It must have been wrangled using this package's wrangling function for MUAC.} \item{sex}{A vector of class \code{numeric} of child's sex.} @@ -17,15 +17,21 @@ wrangled using this package's wrangling function for MUAC.} \item{flags}{A vector of class \code{numeric} of flagged records.} } \value{ -A summarised table of class \code{data.frame}, of length 9 and width 1, for -the plausibility test results and their respective acceptability ratings.. +A summarized table of class \code{data.frame}, of length 9 and width 1, for +the plausibility test results and their respective acceptability ratings. } \description{ Check the overall plausibility and acceptability of raw MUAC data through a structured test suite encompassing sampling and measurement-related biases checks -in the dataset. The test suite in this function follows the recommendation made +in the data set. The test suite in this function follows the recommendation made by Bilukha, O., & Kianian, B. (2023). } +\details{ +Cut-off points used for the percent of flagged records:\tabular{cccc}{ + \strong{Excellent} \tab \strong{Good} \tab \strong{Acceptable} \tab \strong{Problematic} \cr + 0.0 - 1.0 \tab >1.0 - 1.5 \tab >1.5 - 2.0 \tab >2.0 \cr +} +} \examples{ ## First wranlge MUAC data ---- df_muac <- mw_wrangle_muac( diff --git a/man/mw_plausibility_check_wfhz.Rd b/man/mw_plausibility_check_wfhz.Rd index 455b57d..ba4430c 100644 --- a/man/mw_plausibility_check_wfhz.Rd +++ b/man/mw_plausibility_check_wfhz.Rd @@ -7,7 +7,7 @@ mw_plausibility_check_wfhz(df, sex, age, weight, height, flags) } \arguments{ -\item{df}{A dataset object of class \code{data.frame} to check.} +\item{df}{A data set object of class \code{data.frame} to check.} \item{sex}{A vector of class \code{numeric} of child's sex.} @@ -20,13 +20,13 @@ mw_plausibility_check_wfhz(df, sex, age, weight, height, flags) \item{flags}{A vector of class \code{numeric} of flagged records.} } \value{ -A summarised table of class \code{data.frame}, of length 19 and width 1, for +A summarized table of class \code{data.frame}, of length 19 and width 1, for the plausibility test results and their respective acceptability rates. } \description{ Check the overall plausibility and acceptability of WFHZ data through a structured test suite encompassing sampling and measurement-related biases checks -in the dataset. The test suite, including the criteria and corresponding rating of +in the data set. The test suite, including the criteria and corresponding rating of acceptability, follows the standards in the SMART plausibility check. The only exception is the exclusion of MUAC checks. MUAC is checked separately using more comprehensive test suite as well. diff --git a/man/mw_wrangle_age.Rd b/man/mw_wrangle_age.Rd index 7f8bc52..3092065 100644 --- a/man/mw_wrangle_age.Rd +++ b/man/mw_wrangle_age.Rd @@ -7,7 +7,7 @@ mw_wrangle_age(df, dos = NULL, dob = NULL, age, .decimals = 2) } \arguments{ -\item{df}{A dataset of class \code{data.frame} to wrangle age from.} +\item{df}{A data set of class \code{data.frame} to wrangle age from.} \item{dos}{A vector of class \code{Date} for date of data collection from the \code{df}. Default is \code{NULL}.} diff --git a/man/mw_wrangle_muac.Rd b/man/mw_wrangle_muac.Rd index ba26ae0..9d847e2 100644 --- a/man/mw_wrangle_muac.Rd +++ b/man/mw_wrangle_muac.Rd @@ -16,7 +16,7 @@ mw_wrangle_muac( ) } \arguments{ -\item{df}{A dataset object of class \code{data.frame} to wrangle data from.} +\item{df}{A data set object of class \code{data.frame} to wrangle data from.} \item{sex}{A \code{numeric} or \code{character} vector of child's sex. Code values should only be 1 or "m" for males and 2 or "f" for females. Make sure sex values diff --git a/man/mw_wrangle_wfhz.Rd b/man/mw_wrangle_wfhz.Rd index 9d95ab2..0f5546f 100644 --- a/man/mw_wrangle_wfhz.Rd +++ b/man/mw_wrangle_wfhz.Rd @@ -7,7 +7,7 @@ mw_wrangle_wfhz(df, sex, weight, height, .recode_sex = TRUE, .decimals = 3) } \arguments{ -\item{df}{A dataset object of class \code{data.frame} to wrangle data from.} +\item{df}{A data set object of class \code{data.frame} to wrangle data from.} \item{sex}{A \code{numeric} or \code{character} vector of child's sex. Code values should only be 1 or "m" for males and 2 or "f" for females. Make sure sex values @@ -49,8 +49,8 @@ mw_wrangle_wfhz( } \references{ -SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief -and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief} +\emph{and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. } \seealso{ \code{\link[=flag_outliers]{flag_outliers()}} \code{\link[=remove_flags]{remove_flags()}} diff --git a/man/mwana-package.Rd b/man/mwana-package.Rd index 54c6ba3..9f7eca6 100644 --- a/man/mwana-package.Rd +++ b/man/mwana-package.Rd @@ -4,11 +4,11 @@ \name{mwana-package} \alias{mwana} \alias{mwana-package} -\title{mwana: Utilities for Analysing Children's Nutritional Status} +\title{mwana: An Efficient Workflow for Plausibility Checks and Prevalence Analysis of Wasting in R} \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -A streamlined and comprehensive implementation of the Standardized Monitoring and Assessment of Relief and Transition (SMART) Methodology \url{https://smartmethodology.org/} guidelines for data quality checks and prevalence estimation, with enhanced programmable process particularly when handling multiple area datasets. +A simple and streamlined workflow for plausibility checks and prevalence analysis of wasting based on the Standardized Monitoring and Assessment of Relief and Transition (SMART) Methodology \url{https://smartmethodology.org/}, with application in R. } \seealso{ Useful links: diff --git a/man/outliers.Rd b/man/outliers.Rd index f7c1538..86b2ca0 100644 --- a/man/outliers.Rd +++ b/man/outliers.Rd @@ -32,7 +32,7 @@ For raw MUAC values, outliers constitute values that are less than 100 millimeters or greater than 200 millimeters. Removing outliers consist in setting the outlier record to \code{NA} and not necessarily -to delete it from the dataset. This is useful in the analysis procedures +to delete it from the data set. This is useful in the analysis procedures where outliers must be removed, such as the analysis of the standard deviation. } \details{ @@ -50,26 +50,32 @@ by Bilukha, O., & Kianian, B. (2023). x <- anthro.01$muac ## Apply the function with `.from` set to "raw_muac" ---- -flag_outliers(x, .from = "raw_muac") +m <- flag_outliers(x, .from = "raw_muac") +head(m) ## Sample data of z-scores (be it WFHZ, MFAZ, HFAZ or WFAZ) ---- x <- anthro.02$mfaz # Apply the function with `.from` set to "zscores" ---- -flag_outliers(x, .from = "zscores") +z <- flag_outliers(x, .from = "zscores") +tail(z) ## With `.from` set to "zscores" ---- -remove_flags( +z <- remove_flags( x = wfhz.01$wfhz, .from = "zscores" ) +head(z) + ## With `.from` set to "raw_muac" ---- -remove_flags( +m <- remove_flags( x = mfaz.01$muac, .from = "raw_muac" ) +tail(m) + } \references{ Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement diff --git a/man/prev-muac.Rd b/man/prev-muac.Rd new file mode 100644 index 0000000..f09138d --- /dev/null +++ b/man/prev-muac.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prev_wasting_muac.R +\name{mw_estimate_prevalence_muac} +\alias{mw_estimate_prevalence_muac} +\alias{mw_estimate_smart_age_wt} +\title{Estimate the prevalence of wasting based on MUAC for survey data} +\usage{ +mw_estimate_prevalence_muac(df, wt = NULL, edema = NULL, .by = NULL) + +mw_estimate_smart_age_wt(df, edema = NULL, .by = NULL) +} +\arguments{ +\item{df}{A data set object of class \code{data.frame} to use. This must have been +wrangled using this package's wrangling function for MUAC data. Make sure +MUAC values are converted to millimeters after using the wrangler. +If this is not done, the function will stop execution and return an error message. +The function uses a variable name called \code{cluster} where the primary sampling unit IDs +are stored. Make sure the data set has this variable and its name has been +renamed to \code{cluster}, otherwise the function will error and terminate the execution.} + +\item{wt}{A vector of class \code{double} of the final survey weights. Default is +\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; +otherwise, when a vector of weights if supplied, weighted analysis is done.} + +\item{edema}{A vector of class \code{character} of edema. Code should be +"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} + +\item{.by}{A vector of class \code{character} or \code{numeric} of the geographical areas +or respective IDs for where the data was collected and for which the analysis +should be summarized at.} +} +\value{ +A summarized table of class \code{data.frame} of the descriptive +statistics about wasting. +} +\description{ +Calculate the prevalence estimates of wasting based on MUAC and/or bilateral +edema. +Before estimating, the function evaluates the quality of data by calculating +and rating the standard deviation of z-scores of muac-for-age (MFAZ) and the +p-value of the age ratio test; then it sets the analysis path that best fits +the data: +\itemize{ +\item If all tests are rated as not problematic, a normal analysis is done. +\item If standard deviation is not problematic and age ratio test is problematic, +prevalence is age-weighted. This is to fix the likely overestimation of wasting +when there are excess of younger children in the data set. +\item If standard deviation is problematic and age ratio test is not, or both +are problematic, analysis gets cancelled out and \code{NA}s get thrown. +} + +Outliers are detected based on SMART flags on the MFAZ values and then +get excluded prior being piped into the actual prevalence analysis workflow. +} +\examples{ +## When .by = NULL ---- +mw_estimate_prevalence_muac( + df = anthro.04, + wt = NULL, + edema = edema, + .by = NULL +) + +## When .by is not set to NULL ---- +mw_estimate_prevalence_muac( + df = anthro.04, + wt = NULL, + edema = edema, + .by = province +) + +## An application of `mw_estimate_smart_age_wt()` ---- +.data <- anthro.04 |> + subset(province == "Province 2") + +mw_estimate_smart_age_wt( + df = .data, + edema = edema, + .by = NULL +) + +} +\references{ +SMART Initiative (no date). \emph{Updated MUAC data collection tool}. Available at: +\url{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/} +} +\seealso{ +\code{\link[=mw_estimate_smart_age_wt]{mw_estimate_smart_age_wt()}} \code{\link[=mw_estimate_prevalence_mfaz]{mw_estimate_prevalence_mfaz()}} +\code{\link[=mw_estimate_prevalence_screening]{mw_estimate_prevalence_screening()}} +} diff --git a/man/prevalence.Rd b/man/prevalence.Rd deleted file mode 100644 index 06e74e1..0000000 --- a/man/prevalence.Rd +++ /dev/null @@ -1,105 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_mfaz.R, R/prevalence_muac.R, -% R/prevalence_wfhz.R -\name{compute_mfaz_prevalence} -\alias{compute_mfaz_prevalence} -\alias{compute_muac_prevalence} -\alias{compute_wfhz_prevalence} -\title{Compute the prevalence estimates of wasting on the basis of WFHZ, MFAZ or MUAC} -\usage{ -compute_mfaz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) - -compute_muac_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) - -compute_wfhz_prevalence(df, .wt = NULL, .edema = NULL, .summary_by = NULL) -} -\arguments{ -\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} - -\item{.wt}{A vector of class \code{double} of the final survey weights. Default is -\code{NULL} assuming a self weighted survey, as in the ENA for SMART software; -otherwise, when a vector of weights if supplied, weighted analysis is computed.} - -\item{.edema}{A vector of class \code{character} of edema. Code should be -"y" for presence and "n" for absence of bilateral edema. Default is \code{NULL}.} - -\item{.summary_by}{A vector of class \code{character} of the geographical areas -where the data was collected and for which the analysis should be performed.} -} -\value{ -A summarised table of class \code{data.frame} of the descriptive -statistics about wasting. -} -\description{ -The prevalence is calculated in accordance with the complex sample design -properties inherent to surveys. This includes weighting the survey data where -applicable and applying PROBIT method estimation (for WFHZ) when the standard -deviation is problematic. This is as in the SMART Methodology. -} -\examples{ - -## An example of application of `compute_muac_prevalence()` ---- - -### When .summary.by = NULL ---- - -x <- compute_muac_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -print(x) - -### When .summary_by is not set to NULL ---- - -p <- compute_muac_prevalence( -df = anthro.04, -.wt = NULL, -.edema = edema, -.summary_by = province -) - -print(p) - -## An example of application of `compute_wfhz_prevalence()` ---- - -### When .summary_by = NULL ---- -anthro.03 |> -mw_wrangle_wfhz( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -compute_wfhz_prevalence( -.wt = NULL, -.edema = edema, -.summary_by = NULL -) - -### When .summary_by is not set to NULL ---- - -anthro.03 |> -mw_wrangle_wfhz( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -compute_wfhz_prevalence( -.wt = NULL, -.edema = edema, -.summary_by = district -) - -### When a weighted analysis is needed ---- - -anthro.02 |> -compute_wfhz_prevalence( -.wt = "wtfactor", -.edema = edema, -.summary_by = province -) - -} diff --git a/man/probit-method.Rd b/man/probit-method.Rd deleted file mode 100644 index 0b1b698..0000000 --- a/man/probit-method.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_wfhz.R -\name{apply_probit_approach} -\alias{apply_probit_approach} -\alias{compute_probit_prevalence} -\title{Compute the prevalence estimates of wasting on the basis of the PROBIT method.} -\usage{ -apply_probit_approach(x, .status = c("gam", "sam")) - -compute_probit_prevalence(df, .summary_by = NULL, .for = c("wfhz", "mfaz")) -} -\arguments{ -\item{x}{A vector of class \code{double} of WFHZ or MFAZ values.} - -\item{.status}{A choice of the form of wasting for which the prevalence should -be estimated.} - -\item{df}{An already wrangled dataset object of class \code{data.frame} to use.} - -\item{.summary_by}{A vector of class \code{character} of the geographical areas -where the data was collected and for which the analysis should be performed.} - -\item{.for}{A choice between "wfhz" and "mfaz" for the anthropometric index.} -} -\value{ -A summarised table of class \code{data.frame} of the prevalence estimates. -No confidence intervals are yielded. -} -\description{ -This approach is applied when the standard deviation of WFHZ is problematic. -The PROBIT method estimates the prevalence of wasting indirectly by calculating -the area under the tail of the curve, from negative infinitive to -the given threshold, using the cumulative normal distribution function with -the mean and standard deviation as inputs. -} diff --git a/man/rate_propof_flagged.Rd b/man/rate_propof_flagged.Rd index 7de21f4..a174210 100644 --- a/man/rate_propof_flagged.Rd +++ b/man/rate_propof_flagged.Rd @@ -8,11 +8,11 @@ rate_propof_flagged(p, .in = c("mfaz", "wfhz", "raw_muac")) } \arguments{ \item{p}{A vector of class \code{double}, containing the proportions of flagged -records in the dataset. If the class does not match the expected type, the +records in the data set. If the class does not match the expected type, the function will stop execution and return an error message indicating the type of mismatch.} -\item{.in}{Specifies the dataset where the rating should be done, +\item{.in}{Specifies the data set where the rating should be done, with options: "wfhz", "mfaz", or "raw_muac".} } \value{ diff --git a/man/rate_std.Rd b/man/rate_std.Rd index 54bca22..be04dd9 100644 --- a/man/rate_std.Rd +++ b/man/rate_std.Rd @@ -8,10 +8,10 @@ rate_std(sd, .of = c("zscores", "raw_muac")) } \arguments{ \item{sd}{A vector of class \code{double}, containing values of the standard deviation -from the dataset. If the class does not match the expected type, the function +from the data set. If the class does not match the expected type, the function will stop execution and return an error message indicating the type of mismatch.} -\item{.of}{Specifies the dataset where the rating should be done, with options: +\item{.of}{Specifies the data set where the rating should be done, with options: "wfhz", "mfaz", or "raw_muac".} } \value{ diff --git a/man/recode_muac.Rd b/man/recode_muac.Rd index a993a2e..8bc329e 100644 --- a/man/recode_muac.Rd +++ b/man/recode_muac.Rd @@ -7,7 +7,7 @@ recode_muac(x, .to = c("cm", "mm")) } \arguments{ -\item{x}{A vector of the raw MUAC values. The class can either be +\item{x}{A vector of raw MUAC values. The class can either be \code{double} or \code{numeric} or \code{integer}. If different than expected, the function will stop execution and return an error message indicating the type of mismatch.} @@ -33,11 +33,13 @@ muac_cm <- recode_muac( x = anthro.01$muac, .to = "cm" ) +head(muac_cm) ## Using the `muac_cm` object to recode it back to "mm" ---- muac_mm <- recode_muac( x = muac_cm, .to = "mm" ) +tail(muac_mm) } diff --git a/man/tell_muac_analysis_strategy.Rd b/man/tell_muac_analysis_strategy.Rd deleted file mode 100644 index 0e230fd..0000000 --- a/man/tell_muac_analysis_strategy.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/prevalence_muac.R -\name{tell_muac_analysis_strategy} -\alias{tell_muac_analysis_strategy} -\title{A helper function to determine the MUAC prevalence analysis approach to follow} -\usage{ -tell_muac_analysis_strategy(age_ratio_class, sd_class) -} -\arguments{ -\item{age_ratio_class}{A vector of class \code{character} of the acceptability -classification of the age ratio test result.} - -\item{sd_class}{A vector of class \code{character} of the acceptability -classification of the standard deviation analysis result.} -} -\value{ -A vector of class \code{character} of the same length as the input vectors, -containing values indicating the analysis approach for each analysis area: "weighted", -"unweighted" and "missing". -} -\description{ -It determines the analysis approach to follow for a given analysis area on -the basis of the rate of acceptability of the age ratio test and the standard -deviation analysis result. -} -\details{ -When "weighted", the CDC weighting approach is applied to correct for -age bias; when "unweighted" a normal complex sample analysis is applied; when -"missing" \code{NA} gets thrown. -} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 5d805d1..2c813de 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -44,52 +44,60 @@ home: - text: Read more about IPC href: https://www.ipcinfo.org/ -# reference: -# - title: Description -# contents: -# - mwana - -# - title: Built-in datasets -# contents: -# - anthro.01 -# - anthro.02 -# - anthro.03 -# - anthro.04 -# - mfaz.01 -# - mfaz.02 -# - wfhz.01 - -# - title: Wrangle data -# contents: -# - mw_wrangle_age -# - mw_wrangle_wfhz -# - mw_wrangle_muac - -# - title: Statistical tests -# contents: -# - mw_stattest_ageratio - -# - title: Check IPC AMN sample size requirements -# contents: -# - mw_check_ipcamn_ssreq - -# - title: Check plausibility -# contents: -# - mw_plausibility_check_wfhz -# - mw_plausibility_check_mfaz -# - mw_plausibility_check_muac - -# - title: Neat output tables -# contents: -# - mw_neat_output_wfhz -# - mw_neat_output_mfaz -# - mw_neat_output_muac - -# - title: Utilities -# contents: -# - get_age_months -# - recode_muac -# - flag_outliers -# - remove_flags - - +reference: + - title: Description + contents: + - mwana + + - title: Built-in data sets + contents: + - anthro.01 + - anthro.02 + - anthro.03 + - anthro.04 + - mfaz.01 + - mfaz.02 + - wfhz.01 + + - title: Wrangle data + contents: + - mw_wrangle_age + - mw_wrangle_wfhz + - mw_wrangle_muac + + - title: Statistical tests + contents: + - mw_stattest_ageratio + + - title: IPC Acute Malnutrition checks + contents: + - mw_check_ipcamn_ssreq + + - title: Plausibility check + contents: + - mw_plausibility_check_wfhz + - mw_plausibility_check_mfaz + - mw_plausibility_check_muac + + - title: Neat output tables + contents: + - mw_neat_output_wfhz + - mw_neat_output_mfaz + - mw_neat_output_muac + + - title: Estimate prevalence of wasting + contents: + - mw_estimate_prevalence_wfhz + - mw_estimate_prevalence_muac + - mw_estimate_prevalence_mfaz + - mw_estimate_prevalence_combined + - mw_estimate_prevalence_screening + - mw_estimate_smart_age_wt + + - title: Utilities + contents: + - get_age_months + - recode_muac + - flag_outliers + - remove_flags + - define_wasting diff --git a/tests/testthat/test-case_definitions.R b/tests/testthat/test-case_definitions.R deleted file mode 100644 index 079d603..0000000 --- a/tests/testthat/test-case_definitions.R +++ /dev/null @@ -1,311 +0,0 @@ -# Test checks for prevalence helpers used in complex sample based prevalence --- - -## Test check: define_wasting_cases_muac() ---- -### With edema set to NULL -local({ - #### Sample data ---- - muac_values <- c( - 123, 129, 126, 113, 130, 122, 112, 124, 128, - 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 - ) - - #### Expected results ---- - expected_gam <- c(1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1) - expected_sam <- c(0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0) - expected_mam <- c(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1) - - #### Observed results ---- - observed_gam <- define_wasting_cases_muac(muac_values, cases = "gam") - observed_sam <- define_wasting_cases_muac(muac_values, cases = "sam") - observed_mam <- define_wasting_cases_muac(muac_values, cases = "mam") - - #### The test ---- - testthat::test_that( - "define_wasting_cases_muac() defines cases properly", - { - testthat::expect_equal(observed_gam, expected_gam) - testthat::expect_equal(observed_sam, expected_sam) - testthat::expect_equal(observed_mam, expected_mam) - } - ) -}) - -## Test check: define_wasting_cases_muac() ---- -### With edema ---- -local({ - #### Sample data ---- - muac_values <- c( - 123, 129, 126, 113, 130, 122, 112, 124, 128, - 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 - ) - - edema <- c( - "n", "n", "y", "n", "n", "n", "n", "n", "n", "n", "n", "n", - "n", "n", "n", "n", "n", "y", "y", "n" - ) - - #### Expected results ---- - expected_gam <- c(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1) - expected_sam <- c(0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0) - expected_mam <- c(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1) - - #### Observed results ---- - observed_gam <- define_wasting_cases_muac(muac_values, edema, cases = "gam") - observed_sam <- define_wasting_cases_muac(muac_values, edema, cases = "sam") - observed_mam <- define_wasting_cases_muac(muac_values, edema, cases = "mam") - - #### The test ---- - testthat::test_that( - "define_wasting_cases_muac() defines cases properly", - { - testthat::expect_equal(observed_gam, expected_gam) - testthat::expect_equal(observed_sam, expected_sam) - testthat::expect_equal(observed_mam, expected_mam) - } - ) -}) - -## Test check: define_wasting_cases_whz() ---- -### With edema ---- -local({ - #### Sample data ---- - whz <- c( - -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, - -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, - -0.353, -0.474, -1.200, -1.079 - ) - edema <- c( - "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", - "n", "n", "n", "n", "n", "y", "y", "n" - ) - - #### Expected results ---- - expected_gam <- c(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0) - expected_sam <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0) - expected_mam <- c(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0) - - #### Observed results ---- - observed_gam <- define_wasting_cases_whz(whz, edema, cases = "gam") - observed_sam <- define_wasting_cases_whz(whz, edema, cases = "sam") - observed_mam <- define_wasting_cases_whz(whz, edema, cases = "mam") - - #### The test ---- - testthat::test_that( - "define_wasting_cases_whz() defines cases properly", - { - testthat::expect_equal(observed_gam, expected_gam) - testthat::expect_equal(observed_sam, expected_sam) - testthat::expect_equal(observed_mam, expected_mam) - } - ) -}) - -## Test check: define_wasting_cases_whz() ---- -### With edema set to NULL ---- -local({ - #### Sample data ---- - whz <- c( - -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, - -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, - -0.353, -0.474, -1.200, -1.079 - ) - - edema <- NULL - - #### Expected results ---- - expected_gam <- c(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0) - expected_sam <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0) - expected_mam <- c(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0) - - #### Observed results ---- - observed_gam <- define_wasting_cases_whz(whz, edema, cases = "gam") - observed_sam <- define_wasting_cases_whz(whz, edema, cases = "sam") - observed_mam <- define_wasting_cases_whz(whz, edema, cases = "mam") - - #### The test ---- - testthat::test_that( - "define_wasting_cases_whz() defines cases properly", - { - testthat::expect_equal(observed_gam, expected_gam) - testthat::expect_equal(observed_sam, expected_sam) - testthat::expect_equal(observed_mam, expected_mam) - } - ) -}) - -## Test check: define_wasting_cases_combined() ---- -### With edema ---- - -local({ - #### Sample data ---- - whz <- c( - -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, - -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, - -0.353, -0.474, -1.200, -1.079 - ) - muac_values <- c( - 123, 129, 126, 113, 130, 122, 112, 124, 128, - 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 - ) - edema <- c( - "n", "n", "y", "n", "n", "n", "n", "n", "n", "n", "n", "n", - "n", "n", "n", "n", "n", "y", "y", "n" - ) - - #### Expected results ---- - expected_cgam <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) - expected_csam <- c(0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0) - expected_cmam <- c(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1) - - #### Observed results ---- - observed_cgam <- define_wasting_cases_combined(whz, muac_values, edema, cases = "cgam") - observed_csam <- define_wasting_cases_combined(whz, muac_values, edema, cases = "csam") - observed_cmam <- define_wasting_cases_combined(whz, muac_values, edema, cases = "cmam") - - #### The test ---- - testthat::test_that( - "define_wasting_cases_combined() defines cases properly", - { - testthat::expect_equal(observed_cgam, expected_cgam) - testthat::expect_equal(observed_csam, expected_csam) - testthat::expect_equal(observed_cmam, expected_cmam) - } - ) -}) - -### With edema set to NULL ---- - -local({ - #### Sample data ---- - whz <- c( - -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, - -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, - -0.353, -0.474, -1.200, -1.079 - ) - muac_values <- c( - 123, 129, 126, 113, 130, 122, 112, 124, 128, - 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 - ) - edema <- NULL - - #### Expected results ---- - expected_cgam <- c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) - expected_csam <- c(0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0) - expected_cmam <- c(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1) - - #### Observed results ---- - observed_cgam <- define_wasting_cases_combined(whz, muac_values, edema, cases = "cgam") - observed_csam <- define_wasting_cases_combined(whz, muac_values, edema, cases = "csam") - observed_cmam <- define_wasting_cases_combined(whz, muac_values, edema, cases = "cmam") - - #### The test ---- - testthat::test_that( - "define_wasting_cases_combined() defines cases properly", - { - testthat::expect_equal(observed_cgam, expected_cgam) - testthat::expect_equal(observed_csam, expected_csam) - testthat::expect_equal(observed_cmam, expected_cmam) - } - ) -}) - - -## Test check: define_wasting() ---- -### Type set to "wfhz" ---- -local( - { - #### Input data ---- - x <- wfhz.01 |> - define_wasting(wfhz, edema, base = "wfhz") |> - dplyr::select(gam, sam, mam) - - #### The test ---- - testthat::test_that( - "define_wasting() executes job as expected", - { - testthat::expect_s3_class(x, "data.frame") - testthat::expect_named(x, c("gam", "sam", "mam")) - testthat::expect_vector(x$gam, size = 303) - testthat::expect_vector(x$sam, size = 303) - testthat::expect_vector(x$mam, size = 303) - } - ) - } -) - -#### Type set to "muac ---- -local( - { - #### Input data ---- - x <- mfaz.02 |> - define_wasting(muac = muac, edema = edema, base = "muac") |> - dplyr::select(gam, sam, mam) - - #### The test ---- - testthat::test_that( - "define_wasting() executes job as expected", - { - testthat::expect_s3_class(x, "data.frame") - testthat::expect_named(x, c("gam", "sam", "mam")) - testthat::expect_vector(x$gam, size = 303) - testthat::expect_vector(x$sam, size = 303) - testthat::expect_vector(x$mam, size = 303) - } - ) - } -) - -#### Type set to "combined" ---- -local( - { - #### Input data ---- - x <- anthro.02 |> - define_wasting(wfhz, muac, edema, base = "combined") |> - dplyr::select(cgam, csam, cmam) - - #### The test ---- - testthat::test_that( - "define_wasting() executes job as expected", - { - testthat::expect_s3_class(x, "data.frame") - testthat::expect_named(x, c("cgam", "csam", "cmam")) - testthat::expect_vector(x$cgam, size = 2267) - testthat::expect_vector(x$csam, size = 2267) - testthat::expect_vector(x$cmam, size = 2267) - } - ) - } -) - -### Test check: classify_wasting_for_cdc_approach with edema available ---- - -local({ - #### Input data ---- - muac_values <- c( - 123, 129, 126, 113, 130, 122, 112, 124, 128, - 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 - ) - edema <- c( - "n", "n", "y", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n" - , "n", "n", "n", "y", "y", "n" - ) - - #### Expected results ---- - expected <- c( - "mam", "not wasted", "sam", "sam", "not wasted", "mam", "sam", "mam", - "not wasted", "mam", "mam", "sam", "sam", "not wasted", "mam", "not wasted", - "mam", "sam", "sam", "mam" - ) - - #### Observed results ---- - obs <- classify_wasting_for_cdc_approach(muac = muac_values, .edema = edema) - - #### The test ---- - testthat::test_that( - "classify_wasting_for_cdc_approach does his job well", - { - testthat::expect_vector(obs, ptype = "character", size = 20) - testthat::expect_equal(obs, expected) - } - ) -}) diff --git a/tests/testthat/test-plausibility_check_mfaz.R b/tests/testthat/test-plausibility_check_mfaz.R index 56976c0..f1ddc03 100644 --- a/tests/testthat/test-plausibility_check_mfaz.R +++ b/tests/testthat/test-plausibility_check_mfaz.R @@ -89,3 +89,51 @@ testthat::test_that( ) } ) + +# Test check: mw_neat_output_mfaz() ---- +testthat::test_that( + "mw_neat_output_mfaz() works OK when `df` is grouped", + { + ## Workflow ---- + quality <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + age = age, + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + group_by(area) |> + mw_plausibility_check_mfaz( + flags = flag_mfaz, + sex = sex, + muac = muac, + age = age + ) |> + group_by(area) |> + mw_neat_output_mfaz() + + ## Tests ---- + testthat::expect_s3_class(quality, "tbl_df") + testthat::expect_equal(ncol(quality), 18) + testthat::expect_equal(nrow(quality), 2) + testthat::expect_true( + all(c("Group", "Total children", "Flagged data (%)", + "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", + "Age ratio (p)", "Class. of age ratio", "DPS (#)", + "Class. of DPS", "Standard Dev* (#)", "Class. of standard dev", + "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", + "Class. of kurtosis", "Overall score", "Overall quality" + ) %in% names(quality) + + ) + ) + } +) diff --git a/tests/testthat/test-plausibility_check_muac.R b/tests/testthat/test-plausibility_check_muac.R index d6fec16..990a02d 100644 --- a/tests/testthat/test-plausibility_check_muac.R +++ b/tests/testthat/test-plausibility_check_muac.R @@ -70,3 +70,42 @@ testthat::test_that( } ) + +# Test check: mw_neat_output_muac()---- +testthat::test_that( + "mw_neat_output_muac() works when `df` is grouped", + { + ## Workflow ---- + quality <- anthro.01 |> + mw_wrangle_muac( + sex = sex, + muac = muac, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) |> + group_by(area) |> + mw_plausibility_check_muac( + flags = flag_muac, + sex = sex, + muac = muac + )|> + group_by(area) |> + mw_neat_output_muac() + + ## Tests ---- + testthat::expect_s3_class(quality, "data.frame") + testthat::expect_equal(ncol(quality), 10) + testthat::expect_equal(nrow(quality), 2) + testthat::expect_true( + all(c( + "Group","Total children", "Flagged data (%)", "Class. of flagged data", + "Sex ratio (p)", "Class. of sex ratio", "DPS(#)", "Class. of DPS", + "Standard Dev* (#)", "Class. of standard dev" + ) %in% names(quality) + ) + ) + + } +) + diff --git a/tests/testthat/test-plausibility_check_wfhz.R b/tests/testthat/test-plausibility_check_wfhz.R index cb3cc3e..4dcc230 100644 --- a/tests/testthat/test-plausibility_check_wfhz.R +++ b/tests/testthat/test-plausibility_check_wfhz.R @@ -44,7 +44,7 @@ testthat::test_that( } ) -# Test check: ---- +# Test check: mw_neat_output_wfhz() ---- testthat::test_that( "mw_neat_output_wfhz() works", { @@ -88,3 +88,51 @@ testthat::test_that( ) } ) + +# Test check: mw_neat_output_wfhz() ---- +testthat::test_that( + "mw_neat_output_wfhz() works OK when `df` is grouped", + { + quality <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE + ) |> + group_by(area) |> + mw_plausibility_check_wfhz( + flags = flag_wfhz, + sex = sex, + age = age, + weight = weight, + height = height + ) |> + group_by(area) |> + mw_neat_output_wfhz() + + ## Tests ---- + testthat::expect_s3_class(quality, "tbl_df") + testthat::expect_equal(ncol(quality), 20) + testthat::expect_equal(nrow(quality), 2) + testthat::expect_true( + all(c("Group", "Total children", "Flagged data (%)", + "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", + "Age ratio (p)", "Class. of age ratio", "DPS weight (#)", + "Class. DPS weight", "DPS height (#)", "Class. DPS height", + "Standard Dev* (#)", "Class. of standard dev", + "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", + "Class. of kurtosis", "Overall score", "Overall quality" + ) %in% names(quality) + + ) + ) + } +) + diff --git a/tests/testthat/test-prev_define_wasting.R b/tests/testthat/test-prev_define_wasting.R new file mode 100644 index 0000000..26183ec --- /dev/null +++ b/tests/testthat/test-prev_define_wasting.R @@ -0,0 +1,471 @@ +# Test checks: define_wasting_muac() ---- +## With edema ---- +testthat::test_that( + "define_wasting_muac() defines cases as it should", + { + ### Sample data ---- + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + edema <- c( + "n", "n", "y", "n", "n", "n", "n", "n", "n", "n", "n", "n", + "n", "n", "n", "n", "n", "y", "y", "n" + ) + + ### Expected results ---- + exp_gam <- c(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1) + exp_sam <- c(0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0) + exp_mam <- c(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1) + + ### Observed results ---- + obs_gam <- define_wasting_muac(muac_values, edema, .cases = "gam") + obs_sam <- define_wasting_muac(muac_values, edema, .cases = "sam") + obs_mam <- define_wasting_muac(muac_values, edema, .cases = "mam") + + ### Tests ---- + testthat::expect_equal(obs_gam, exp_gam) + testthat::expect_vector(obs_gam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_sam, exp_sam) + testthat::expect_vector(obs_sam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_mam, exp_mam) + testthat::expect_vector(obs_mam, ptype = numeric(), size = 20) + } +) + +## With edema set to NULL +testthat::test_that( + "define_wasting_muac() defines cases as it should when edema is set to NULL", + { + ### Sample data ---- + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + + ### Expected results ---- + exp_gam <- c(1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1) + exp_sam <- c(0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0) + exp_mam <- c(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1) + + ### Observed results ---- + obs_gam <- define_wasting_muac(muac_values, .cases = "gam") + obs_sam <- define_wasting_muac(muac_values, .cases = "sam") + obs_mam <- define_wasting_muac(muac_values, .cases = "mam") + + ### Tests ---- + testthat::expect_equal(obs_gam, exp_gam) + testthat::expect_vector(obs_gam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_sam, exp_sam) + testthat::expect_vector(obs_sam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_mam, exp_mam) + testthat::expect_vector(obs_mam, ptype = numeric(), size = 20) + } +) + + +# Test check: define_wasting_zscores() ---- +## With edema ---- +testthat::test_that( + "define_wasting_zscores() defines cases as it should", + { + ### Sample data ---- + wfhz <- c( + -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, + -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, + -0.353, -0.474, -1.200, -1.079 + ) + edema <- c( + "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", + "n", "n", "n", "n", "n", "y", "y", "n" + ) + + ### Expected results ---- + exp_gam <- c(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0) + exp_sam <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0) + exp_mam <- c(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0) + + ### Observed results ---- + obs_gam <- define_wasting_zscores(zscores = wfhz, edema = edema, .cases = "gam") + obs_sam <- define_wasting_zscores(zscores = wfhz, edema = edema, .cases = "sam") + obs_mam <- define_wasting_zscores(zscores = wfhz, edema = edema, .cases = "mam") + + ### Tests ---- + testthat::expect_equal(obs_gam, exp_gam) + testthat::expect_vector(obs_gam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_sam, exp_sam) + testthat::expect_vector(obs_sam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_mam, exp_mam) + testthat::expect_vector(obs_mam, ptype = numeric(), size = 20) + } +) + + +# Test check: define_wasting_cases_whz() ---- +## With edema set to NULL ---- +testthat::test_that( + "define_wasting_zscores() defines cases as it should when edema is set to NULL", + { + ### Sample data ---- + wfhz <- c( + -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, + -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, + -0.353, -0.474, -1.200, -1.079 + ) + + edema <- NULL + + ### Expected results ---- + exp_gam <- c(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0) + exp_sam <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0) + exp_mam <- c(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0) + + ### Observed results ---- + obs_gam <- define_wasting_zscores(zscores = wfhz, edema, .cases = "gam") + obs_sam <- define_wasting_zscores(zscores = wfhz, edema, .cases = "sam") + obs_mam <- define_wasting_zscores(zscores = wfhz, edema, .cases = "mam") + + ### Tests ---- + testthat::expect_equal(obs_gam, exp_gam) + testthat::expect_vector(obs_gam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_sam, exp_sam) + testthat::expect_vector(obs_sam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_mam, exp_mam) + testthat::expect_vector(obs_mam, ptype = numeric(), size = 20) + } +) + +# Test check: define_wasting_combined() ---- +## With edema ---- +testthat::test_that( + "define_wasting_combined() defines cases as it should", + { + ### Sample data ---- + wfhz <- c( + -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, + -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, + -0.353, -0.474, -1.200, -1.079 + ) + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + edema <- c( + "n", "n", "y", "n", "n", "n", "n", "n", "n", "n", "n", "n", + "n", "n", "n", "n", "n", "y", "y", "n" + ) + + ### Expected results ---- + exp_cgam <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) + exp_csam <- c(0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0) + exp_cmam <- c(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1) + + ### Observed results ---- + obs_cgam <- define_wasting_combined( + zscores = wfhz, + muac = muac_values, + edema = edema, + .cases = "cgam" + ) + obs_csam <- define_wasting_combined( + zscores = wfhz, + muac = muac_values, + edema = edema, + .cases = "csam" + ) + obs_cmam <- define_wasting_combined( + zscores = wfhz, + muac = muac_values, + edema = edema, + .cases = "cmam" + ) + + ### The test ---- + testthat::expect_equal(obs_cgam, exp_cgam) + testthat::expect_vector(obs_cgam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_csam, exp_csam) + testthat::expect_vector(obs_csam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_cmam, exp_cmam) + testthat::expect_vector(obs_cmam, ptype = numeric(), size = 20) + } +) + +## With edema set to NULL ---- +testthat::test_that( + "define_wasting_combined() defines cases as it should when edema is set to NULL", + { + ### Sample data ---- + wfhz <- c( + -0.958, -2.410, -0.232, -2.289, -3.015, -1.563, -2.773, -1.442, + -2.652, -3.257, -2.531, -2.894, -0.595, -3.378, -1.321, -2.047, + -0.353, -0.474, -1.200, -1.079 + ) + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + edema <- NULL + + ### Expected results ---- + exp_cgam <- c(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) + exp_csam <- c(0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0) + exp_cmam <- c(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1) + + ### Observed results ---- + obs_cgam <- define_wasting_combined( + zscores = wfhz, + muac = muac_values, + edema = edema, + .cases = "cgam" + ) + obs_csam <- define_wasting_combined( + zscores = wfhz, + muac = muac_values, + edema = edema, + .cases = "csam" + ) + obs_cmam <- define_wasting_combined( + zscores = wfhz, + muac = muac_values, + edema = edema, + .cases = "cmam" + ) + + ### The test ---- + testthat::expect_equal(obs_cgam, exp_cgam) + testthat::expect_vector(obs_cgam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_csam, exp_csam) + testthat::expect_vector(obs_csam, ptype = numeric(), size = 20) + testthat::expect_equal(obs_cmam, exp_cmam) + testthat::expect_vector(obs_cmam, ptype = numeric(), size = 20) + } +) + + +# Test check: define_wasting() ---- +## `.by` set to "zscores" ---- +testthat::test_that( + "define_wasting() works as expected with zscores", + { + ### Input data ---- + df <- wfhz.01 |> + define_wasting( + zscores = wfhz, + muac = NULL, + edema = edema, + .by = "zscores" + ) |> + select(gam, sam, mam) + + ### Vectors of wrong class ---- + data <- wfhz.01 + data$x <- as.character(data$wfhz) + data$ed <- as.factor(data$edema) + + ### Tests ---- + testthat::expect_s3_class(df, "data.frame") + testthat::expect_named(df, c("gam", "sam", "mam")) + testthat::expect_vector(df$gam, size = 303) + testthat::expect_vector(df$sam, size = 303) + testthat::expect_vector(df$mam, size = 303) + testthat::expect_error( + define_wasting( + df = data, + zscores = x, + muac = NULL, + edema = edema, + .by = "zscores" + ), + regexp = paste0( + "`zscores` must be of class 'double'; not ", shQuote(class(data$x)), + ". Please try again." + ) + ) + testthat::expect_error( + define_wasting( + df = data, + zscores = wfhz, + muac = NULL, + edema = ed, + .by = "zscores" + ), + regexp = paste0( + "`edema` must be of class 'character'; not ", + shQuote(class(data$ed)), ". Please try again." + ) + ) + } +) + +## `.by` set to "muac" ---- +testthat::test_that( + "define_wasting() works as expected with muac", + { + ### Input data ---- + df <- mfaz.02 |> + define_wasting( + muac = muac, + edema = edema, + .by = "muac" + ) |> + select(gam, sam, mam) + + ### Vectors of wrong class ---- + data <- mfaz.02 + data$m <- as.character(data$muac) + data$ed <- as.factor(data$edema) + + ### Tests ---- + testthat::expect_s3_class(df, "data.frame") + testthat::expect_named(df, c("gam", "sam", "mam")) + testthat::expect_vector(df$gam, size = 303) + testthat::expect_vector(df$sam, size = 303) + testthat::expect_vector(df$mam, size = 303) + testthat::expect_error( + define_wasting( + df = data, + muac = m, + edema = edema, + .by = "muac" + ), + regexp = paste0( + "`muac` must be of class 'numeric' or 'integer'; not ", + shQuote(class(data$m)), ". Please try again." + ) + ) + testthat::expect_error( + define_wasting( + df = data, + muac = muac, + edema = ed, + .by = "muac" + ), + regexp = paste0( + "`edema` must be of class 'character'; not ", + shQuote(class(data$ed)), ". Please try again." + ) + ) + } +) + + +## `.by` set to "combined" ---- +testthat::test_that( + "define_wasting() works as expected with for combined", + { + ### Input data ---- + x <- anthro.02 |> + define_wasting( + zscores = wfhz, + muac = muac, + edema = edema, + .by = "combined" + ) |> + select(cgam, csam, cmam) + + ### Wrong vectors ---- + y <- anthro.02 + y$zs <- as.character(anthro.02$wfhz) + y$m <- as.character(anthro.02$muac) + y$ed <- ifelse(y$edema == "n", "p", "y") + ### Tests ---- + testthat::expect_s3_class(x, "data.frame") + testthat::expect_named(x, c("cgam", "csam", "cmam")) + testthat::expect_vector(x$cgam, size = 2267) + testthat::expect_vector(x$csam, size = 2267) + testthat::expect_vector(x$cmam, size = 2267) + testthat::expect_error( + define_wasting( + df = y, + zscores = zs, + muac = muac, + edema = edema, + .by = "combined" + ), + regexp = paste0( + "`zscores` must be of class 'double'; not ", shQuote(class(y$zs)), + ". Please try again." + ) + ) + testthat::expect_error( + define_wasting( + df = y, + zscores = wfhz, + muac = m, + edema = edema, + .by = "combined" + ), + regexp = paste0( + "`muac` must be of class 'numeric' or 'integer'; not ", + shQuote(class(y$m)), ". Please try again." + ) + ) + testthat::expect_error( + define_wasting( + df = y, + zscores = wfhz, + muac = muac, + edema = ed, + .by = "combined" + ), + regexp = paste0( + "Values in `edema` should either be 'y' or 'n'. Please try again." + ) + ) + } +) + +# Test check: def_wasting_smart_muac_tool() ---- +## Edema is not NULL ---- +testthat::test_that( + "The function works as designed for", + { + ### Input data ---- + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + edema <- c( + "n", "n", "y", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", + "n", "n", "n", "y", "y", "n" + ) + + expected <- c( + "mam", "not wasted", "sam", "sam", "not wasted", "mam", "sam", "mam", + "not wasted", "mam", "mam", "sam", "sam", "not wasted", "mam", "not wasted", + "mam", "sam", "sam", "mam" + ) + + ### Observed results ---- + obs <- smart_tool_case_definition(muac_values, edema) + + ## Tests ---- + testthat::expect_vector(obs, ptype = "character", size = 20) + testthat::expect_equal(obs, expected) + } +) + +## Edema is NULL ---- +testthat::test_that( + "The function works as designed for", + { + ### Input data ---- + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + + expected <- c( + "mam", "not wasted", "not wasted", "sam", "not wasted", "mam", "sam", "mam", + "not wasted", "mam", "mam", "sam", "sam", "not wasted", "mam", "not wasted", + "mam", "mam", "sam", "mam" + ) + + ### Observed results ---- + obs <- smart_tool_case_definition(muac_values) + + ## Tests ---- + testthat::expect_vector(obs, ptype = "character", size = 20) + testthat::expect_equal(obs, expected) + } +) diff --git a/tests/testthat/test-prev_wasting_combined.R b/tests/testthat/test-prev_wasting_combined.R new file mode 100644 index 0000000..48fc0c4 --- /dev/null +++ b/tests/testthat/test-prev_wasting_combined.R @@ -0,0 +1,311 @@ +# Test check: mw_estimate_prevalence_combined() ---- +## When std != problematic & MUAC analysis path is unweighted & !is.null(wt) ---- +testthat::test_that( + "mw_estimate_prevalence_combined() yields correct estimates when edema and + survey weights are supplied", + { + ### Get prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_combined(edema = edema, wt = wtfactor) + + ### Expected results ---- + #### combined GAM estimates and uncertainty ---- + n_cgam <- 199 + p_cgam <- 7.1 + p_cgam_lci <- 5.6 + p_cgam_uci <- 8.5 + deff <- 1.72 + + #### combined SAM estimates and uncertainty ---- + n_csam <- 68 + p_csam <- 1.5 + p_csam_lci <- 0.8 + p_csam_uci <- 2.3 + + #### combined MAM estimates and uncertainty ---- + n_cmam <- 145 + p_cmam <- 6.0 + p_cmam_lci <- 4.7 + p_cmam_uci <- 7.3 + + #### Sum of weights ----- + sum_wt <- 1738110 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_cgam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_cgam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_cgam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_cgam_uci) + testthat::expect_equal(round(p[[5]][1], 2), deff) + testthat::expect_equal(p[[6]][1], n_csam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_csam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_csam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_csam_uci) + testthat::expect_equal(p[[11]][1], n_cmam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_cmam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_cmam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_cmam_uci) + testthat::expect_equal(round(p[[16]][1]), sum_wt) + } +) + +## When std != problematic & MUAC analysis path is unweighted & !is.null(wt) ---- +testthat::test_that( + "mw_estimate_prevalence_combined() yields correct estimates when edema is NULL", + { + ### Get prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_combined(edema = NULL, wt = wtfactor) + + ### Expected results ---- + #### Combined GAM estimates and uncertainty ---- + n_cgam <- 187 + p_cgam <- 6.4 + p_cgam_lci <- 5.0 + p_cgam_uci <- 7.8 + deff <- 1.67 + + #### combined SAM estimates and uncertainty ---- + n_csam <- 55 + p_csam <- 0.8 + p_csam_lci <- 0.3 + p_csam_uci <- 1.2 + + #### combined MAM estimates and uncertainty ---- + n_cmam <- 146 + p_cmam <- 6.1 + p_cmam_lci <- 4.8 + p_cmam_uci <- 7.4 + + ### Sum of weights ---- + sum_wt <- 1738110 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_cgam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_cgam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_cgam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_cgam_uci) + testthat::expect_equal(p[[6]][1], n_csam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_csam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_csam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_csam_uci) + testthat::expect_equal(p[[11]][1], n_cmam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_cmam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_cmam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_cmam_uci) + testthat::expect_equal(round(p[[16]][1]), sum_wt) + } +) + +## When is.null(wt) ---- +testthat::test_that( + "mw_estimate_prevalence_combined() yields correct estimates when `wt` is NULL", + { + ### Get prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_combined(edema = edema) + + ### Expected results ---- + #### combined GAM estimates and uncertainty ---- + n_cgam <- 199 + p_cgam <- 6.8 + p_cgam_lci <- 5.7 + p_cgam_uci <- 8.0 + + #### combined SAM estimates and uncertainty ---- + n_csam <- 68 + p_csam <- 1.3 + p_csam_lci <- 0.8 + p_csam_uci <- 1.8 + + #### combined MAM estimates and uncertainty ---- + n_cmam <- 145 + p_cmam <- 5.9 + p_cmam_lci <- 4.8 + p_cmam_uci <- 7.0 + + ### Test ---- + testthat::expect_equal(p[[1]][1], n_cgam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_cgam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_cgam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_cgam_uci) + testthat::expect_equal(p[[6]][1], n_csam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_csam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_csam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_csam_uci) + testthat::expect_equal(p[[11]][1], n_cmam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_cmam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_cmam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_cmam_uci) + } +) + +## When !is.null(wt) with .by = province ---- +testthat::test_that( + "mw_estimate_prevalence_combined() yields correct estimates when `.by` is + used", + { + ### Get prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_combined( + edema = edema, + wt = wtfactor, + .by = province + ) + + ### Expected results for Nampula province ---- + #### GAM estimates and uncertainty ---- + n_cgam <- 121 + p_cgam <- 8.4 + p_cgam_lci <- 6.0 + p_cgam_uci <- 10.9 + deff <- 1.87 + + #### SAM estimates and uncertainty ---- + n_csam <- 47 + p_csam <- 2.0 + p_csam_lci <- 0.7 + p_csam_uci <- 3.3 + + #### MAM estimates and uncertainty ---- + n_cmam <- 80 + p_cmam <- 6.8 + p_cmam_lci <- 4.7 + p_cmam_uci <- 9.0 + + #### Sum of survey weights ----- + sum_wt <- 869504 + + ### Tests ---- + testthat::expect_equal(p[[2]][2], n_cgam) + testthat::expect_equal(round(p[[3]][2] * 100, 1), p_cgam) + testthat::expect_equal(round(p[[4]][2] * 100, 1), p_cgam_lci) + testthat::expect_equal(round(p[[5]][2] * 100, 1), p_cgam_uci) + testthat::expect_equal(round(p[[6]][2], 2), deff) + testthat::expect_equal(p[[7]][2], n_csam) + testthat::expect_equal(round(p[[8]][2] * 100, 1), p_csam) + testthat::expect_equal(round(p[[9]][2] * 100, 1), p_csam_lci) + testthat::expect_equal(round(p[[10]][2] * 100, 1), p_csam_uci) + testthat::expect_equal(p[[12]][2], n_cmam) + testthat::expect_equal(round(p[[13]][2] * 100, 1), p_cmam) + testthat::expect_equal(round(p[[14]][2] * 100, 1), p_cmam_lci) + testthat::expect_equal(round(p[[15]][2] * 100, 1), p_cmam_uci) + testthat::expect_equal(round(p[[17]][2]), sum_wt) + } +) + +## When !is.null(.by) and analysis approach has different categories ---- +testthat::test_that( + "mw_estimate_prevalence_combined() works well on a multiple area survey data set + where different analysis approaches are required", + { + ### Get the prevalence estimates ---- + p <- anthro.03 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = "age", + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + mutate(muac = recode_muac(muac, .to = "mm")) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE + ) |> + mw_estimate_prevalence_combined( + edema = edema, + .by = district + ) + + ### Subset a district where a normal analysis should be computed ---- + CB <- subset(p, district == "Cahora-Bassa") + + ### Subset a district where NA should be thrown ---- + M <- subset(p, district == "Maravia") |> + select(!district) + + ### Tests ---- + testthat::expect_vector(select(p, !district), size = 4, ncol(17)) + testthat::expect_s3_class(p, "tbl") + testthat::expect_false(all(sapply(CB[names(CB)], \(.) all(is.na(.))))) + testthat::expect_true(all(sapply(M[names(M)], \(.) all(is.na(.))))) + } +) + +## When !is.null(.by) and analysis approach has different categories ---- +testthat::test_that( + "mw_estimate_prevalence_combined() works as expected", + { + ### Get the prevalence estimates ---- + p <- anthro.03 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = "age", + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + mutate(muac = recode_muac(muac, .to = "mm")) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE + ) |> + subset(district == "Maravia") |> + mw_estimate_prevalence_combined( + edema = edema, + .by = NULL + ) + + ### Tests ---- + testthat::expect_vector(p, size = 1, ncol(3)) + testthat::expect_s3_class(p, "tbl") + testthat::expect_true(all(sapply(p[names(p)], \(.) all(is.na(.))))) + } +) + +## When MUAC is not in millimeters the function errors ---- +testthat::test_that( + "When MUAC is not in centimeters, the function stop execution", + { + testthat::expect_error( + x <- anthro.01 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = age, + .recode_sex = FALSE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = F, + .decimals = 3 + ) |> + mw_estimate_prevalence_combined(edema = edema), + regexp = "MUAC values must be in millimeters. Please try again." + ) + } +) diff --git a/tests/testthat/test-prev_wasting_mfaz.R b/tests/testthat/test-prev_wasting_mfaz.R new file mode 100644 index 0000000..bfc2865 --- /dev/null +++ b/tests/testthat/test-prev_wasting_mfaz.R @@ -0,0 +1,117 @@ +# Test check: mw_estimate_prevalence_mfaz ---- +## When std != problematic & is.null(.wt) & !is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_mfaz() yields correct estimates", + { + ### Get the prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_mfaz(edema = edema, .by = NULL) + + ### Expected results ---- + #### GAM estimates and uncertainty ---- + n_gam <- 226 + p_gam <- 9.6 + p_gam_lci <- 8.1 + p_gam_uci <- 11.2 + + #### SAM estimates and uncertainty ---- + n_sam <- 75 + p_sam <- 2.7 + p_sam_lci <- 1.9 + p_sam_uci <- 3.5 + + #### MAM estimates and uncertainty ---- + n_mam <- 151 + p_mam <- 6.9 + p_mam_lci <- 5.6 + p_mam_uci <- 8.2 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + +## When std != problematic & is.null(.wt) & is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_mfaz() yields correct estimates", + { + ### Get the prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_mfaz(edema = NULL, .by = NULL) + + ### Expected results ---- + #### GAM estimates and uncertainty ---- + n_gam <- 215 + p_gam <- 9.1 + p_gam_lci <- 7.6 + p_gam_uci <- 10.6 + + #### SAM estimates and uncertainty ---- + n_sam <- 62 + p_sam <- 2.1 + p_sam_lci <- 1.4 + p_sam_uci <- 2.8 + + #### MAM estimates and uncertainty ---- + n_mam <- 153 + p_mam <- 7.0 + p_mam_lci <- 5.7 + p_mam_uci <- 8.3 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + + +## When standard deviation == problematic ---- +testthat::test_that( + "mw_estimate_prevalence_mfaz() works well on a dataframe with multiple survey areas with + different categories on analysis_approach", + { + ### Get the prevalence estimates ---- + p <- anthro.04 |> + mw_estimate_prevalence_mfaz(edema = edema, .by = province) + + ### Subset a province whose analysis approach is unweighted --- + province_1 <- subset(p, province == "Province 1") + + ### Subset a province whose analysis approach is weighted --- + province_3 <- subset(p, province == "Province 3") + + + columns_to_check <- c( + "gam_n", "gam_p_low", "gam_p_upp", "sam_n", + "sam_p_low", "sam_p_upp", "mam_n", "mam_p_low", + "mam_p_upp", "wt_pop" + ) + + ### Tests ---- + testthat::expect_vector(dplyr::select(p, !province), size = 3, ncol(17)) + testthat::expect_s3_class(p, "tbl") + testthat::expect_false(all(sapply(province_1[columns_to_check], \(.) all(is.na(.))))) + testthat::expect_true(all(sapply(province_3[columns_to_check], \(.) all(is.na(.))))) + } +) diff --git a/tests/testthat/test-prev_wasting_muac.R b/tests/testthat/test-prev_wasting_muac.R new file mode 100644 index 0000000..6482bf3 --- /dev/null +++ b/tests/testthat/test-prev_wasting_muac.R @@ -0,0 +1,562 @@ +# Test check: set_analysis_path() ---- +testthat::test_that( + "set_analysis_path() works", + { + ## Input data ---- + age_ratio_class_1 <- "Problematic" + age_ratio_class_2 <- "Good" + std_class_1 <- "Excellent" + std_class_2 <- "Problematic" + + ## Expected results ---- + expected_1 <- "weighted" + expected_2 <- "missing" + expected_3 <- "unweighted" + + ## Observed results ---- + obs_1 <- set_analysis_path(age_ratio_class_1, std_class_1) + obs_2 <- set_analysis_path(age_ratio_class_1, std_class_2) + obs_3 <- set_analysis_path(age_ratio_class_2, std_class_1) + + ## Tests ---- + testthat::expect_equal(obs_1, expected_1) + testthat::expect_equal(obs_2, expected_2) + testthat::expect_equal(obs_3, expected_3) + } +) + +# Test check: smart_tool_case_definition () with edema set o NULL ---- +testthat::test_that( + "smart_tool_case_definition() does its job well", + { + ## Input data ---- + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + + ## Expected results ---- + expected <- c( + "mam", "not wasted", "not wasted", "sam", "not wasted", "mam", "sam", "mam", + "not wasted", "mam", "mam", "sam", "sam", "not wasted", "mam", "not wasted", + "mam", "mam", "sam", "mam" + ) + + ## Observed results ---- + obs <- smart_tool_case_definition(muac = muac_values, edema = NULL) + + ## Tests ---- + testthat::expect_vector(obs, ptype = "character", size = 20) + testthat::expect_equal(obs, expected) + } +) + +# Test check: smart_tool_case_definition() with edema supplied ---- +testthat::test_that( + "smart_tool_case_definition() does its job well", + { + ## Input data ---- + muac_values <- c( + 123, 129, 126, 113, 130, 122, 112, 124, 128, + 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 + ) + edema <- c( + "n", "n", "n", "n", "y", "y", "y", "n", "y", "n", "n", "y", "n", + "y", "n", "n", "n", "n", "n", "n" + ) + + ## Expected results ---- + expected <- c( + "mam", "not wasted", "not wasted", "sam", "sam", "sam", "sam", "mam", + "sam", "mam", "mam", "sam", "sam", "sam", "mam", "not wasted", + "mam", "mam", "sam", "mam" + ) + + ## Observed results ---- + obs <- smart_tool_case_definition(muac = muac_values, edema = edema) + + ## Tests ---- + testthat::expect_vector(obs, ptype = "character", size = 20) + testthat::expect_equal(obs, expected) + } +) + +# Test check: smart_age_weighting() ---- +## Edema set to !NULL ---- +testthat::test_that( + "smart_age_weighting() works amazing", + { + ### Input data ---- + x <- mfaz.01 |> + mw_wrangle_age( + age = age + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = age, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + subset(flag_mfaz == 0) |> + mutate(muac = recode_muac(muac, .to = "mm")) + + + #### Expected results calculated in the CDC/SMART MUAC tool ---- + expect_sam <- 0.021 + expect_mam <- 0.081 + + #### Observed results ---- + obs_sam <- with( + x, + smart_age_weighting( + muac = muac, + edema = edema, + age = age, + .form = "sam" + ) + ) + obs_mam <- with( + x, + smart_age_weighting( + muac = muac, + edema = edema, + age = age, + .form = "mam" + ) + ) + + ## Tests ---- + testthat::expect_vector(obs_sam, size = 1) + testthat::expect_vector(obs_mam, size = 1) + testthat::expect_equal(round(obs_sam, 3), expect_sam) + testthat::expect_equal(round(obs_mam, 3), expect_mam) + } +) + +## Edema set to NULL ---- +testthat::test_that( + "smart_age_weighting() works amazing", + { + ## Input data ---- + x <- mfaz.01 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = age, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + subset(flag_mfaz == 0) |> + mutate( + muac = recode_muac(muac, .to = "mm") + ) + + + #### Expected results calculated in the CDC/SMART MUAC tool ---- + expect_sam <- 0.014 + expect_mam <- 0.080 + + #### Observed results ---- + obs_sam <- with( + x, + smart_age_weighting( + muac = muac, + age = age, + .form = "sam" + ) + ) + obs_mam <- with( + x, + smart_age_weighting( + muac = muac, + age = age, + .form = "mam" + ) + ) + + ## Tests ---- + testthat::expect_vector(obs_sam, size = 1) + testthat::expect_vector(obs_mam, size = 1) + testthat::expect_equal(round(obs_sam, 3), expect_sam) + testthat::expect_equal(round(obs_mam, 2), expect_mam) + } +) + +# Test check: mw_estimate_prevalence_muac() ---- +## When age_ratio & std != problematic & !is.null(wt) & !is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_muac() yields correct estimates when edema and survey + weights are supplied", + { + ### Get the prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_muac(edema = edema, wt = wtfactor, .by = NULL) + + ### Expected results ---- + ### GAM estimates and uncertainty ---- + n_gam <- 135 + p_gam <- 5.6 + p_gam_lci <- 4.3 + p_gam_uci <- 6.9 + deff <- 1.86 + + ### SAM estimates and uncertainty ---- + n_sam <- 46 + p_sam <- 1.7 + p_sam_lci <- 0.9 + p_sam_uci <- 2.4 + + ### MAM estimates and uncertainty ---- + n_mam <- 89 + p_mam <- 4.0 + p_mam_lci <- 3.0 + p_mam_uci <- 4.9 + + ### Tests ---- + + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + +## When age_ratio & std != problematic & is.null(wt) & !is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_muac() yields correct estimates survey + weights are not supplied", + { + ### Get the prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_muac(wt = NULL) + + ### Expected results ---- + ### GAM estimates and uncertainty ---- + n_gam <- 123 + p_gam <- 4.9 + p_gam_lci <- 3.8 + p_gam_uci <- 5.9 + + ### SAM estimates and uncertainty ---- + n_sam <- 33 + p_sam <- 0.7 + p_sam_lci <- 0.4 + p_sam_uci <- 1.1 + + ### MAM estimates and uncertainty ---- + n_mam <- 90 + p_mam <- 4.1 + p_mam_lci <- 3.2 + p_mam_uci <- 5.1 + + ### Tests ---- + + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + +## When age_ratio & std != problematic & !is.null(wt) & !is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_muac() yields correct estimates when edema is not + supplied", + { + ### Get the prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_muac(edema = NULL, wt = wtfactor, .by = NULL) + + ### Expected results ---- + #### GAM estimates and uncertainty ---- + n_gam <- 123 + p_gam <- 5.0 + p_gam_lci <- 3.8 + p_gam_uci <- 6.2 + deff <- 1.75 + + #### SAM estimates and uncertainty ---- + n_sam <- 33 + p_sam <- 0.9 + p_sam_lci <- 0.4 + p_sam_uci <- 1.5 + + #### MAM estimates and uncertainty ---- + n_mam <- 90 + p_mam <- 4.0 + p_mam_lci <- 3.1 + p_mam_uci <- 5.0 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + +## When age_ratio & std != problematic & is.null(wt) ---- +testthat::test_that( + "mw_estimate_prevalence_muac() yields correct estimates when edema is not supplied", + { + ### Get prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_muac(edema = NULL, .by = NULL) + + ### Expected results ---- + #### GAM estimates and uncertainty ---- + n_gam <- 123 + p_gam <- 4.9 + p_gam_lci <- 3.8 + p_gam_uci <- 5.9 + + #### SAM estimates and uncertainty ---- + n_sam <- 33 + p_sam <- 0.7 + p_sam_lci <- 0.4 + p_sam_uci <- 1.1 + + #### MAM estimates and uncertainty ---- + n_mam <- 90 + p_mam <- 4.1 + p_mam_lci <- 3.2 + p_mam_uci <- 5.1 + + ### The test ---- + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + + +## When age_ratio & std != problematic & !is.null(wt) with .by = province +testthat::test_that( + "mw_estimate_prevalence_muac() yields correct estimates when .by is + used", + { + ### Get prevalence estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_muac( + edema = edema, + wt = wtfactor, + .by = province + ) + + ### Expected results for Zambezia province ---- + #### GAM estimates and uncertainty ---- + n_gam <- 65 + p_gam <- 5.5 + p_gam_lci <- 3.8 + p_gam_uci <- 7.2 + deff <- 1.67 + + #### SAM estimates and uncertainty ---- + n_sam <- 18 + p_sam <- 1.3 + p_sam_lci <- 0.4 + p_sam_uci <- 2.2 + + #### MAM estimates and uncertainty ---- + n_mam <- 47 + p_mam <- 4.2 + p_mam_lci <- 3.0 + p_mam_uci <- 5.4 + + #### Sum of weigths ---- + sum_wt <- 880902 + + ### The test ---- + testthat::expect_equal(p[[2]][2], n_gam) + testthat::expect_equal(round(p[[3]][2] * 100, 1), p_gam) + testthat::expect_equal(round(p[[4]][2] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[5]][2] * 100, 1), p_gam_uci) + testthat::expect_equal(round(p[[6]][2], 2), deff) + testthat::expect_equal(p[[7]][2], n_sam) + testthat::expect_equal(round(p[[8]][2] * 100, 1), p_sam) + testthat::expect_equal(round(p[[9]][2] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[10]][2] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[12]][2], n_mam) + testthat::expect_equal(round(p[[13]][2] * 100, 1), p_mam) + testthat::expect_equal(round(p[[14]][2] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[15]][2] * 100, 1), p_mam_uci) + testthat::expect_equal(p[[17]][2], sum_wt) + } +) + + +## When !is.null(.by) and analysis approach has different categories ---- +testthat::test_that( + "mw_estimate_prevalence_muac() works well on a dataframe with multiple survey areas with + different categories of analysis_approach", + { + ### Get the prevalence estimates ---- + p <- anthro.04 |> + mw_estimate_prevalence_muac(edema = edema, .by = province) + + ### A Province whose analysis approach is unweighted --- + province_1 <- subset(p, province == "Province 1") + + ### A Province whose analysis approach is weighted --- + province_2 <- subset(p, province == "Province 2") + + ### A Province whose analysis approach is add missing (NA's) --- + province_3 <- subset(p, province == "Province 3") |> + select(!province) + + columns_to_check <- c( + "gam_n", "gam_p_low", "gam_p_upp", "sam_n", + "sam_p_low", "sam_p_upp", "mam_n", "mam_p_low", + "mam_p_upp", "wt_pop" + ) + + ### The test ---- + testthat::expect_vector(select(p, !province), size = 3, ncol(17)) + testthat::expect_s3_class(p, "tbl") + testthat::expect_false(all(sapply(province_1[columns_to_check], \(.) all(is.na(.))))) + testthat::expect_true(all(sapply(province_2[columns_to_check], \(.) all(is.na(.))))) + testthat::expect_true(all(sapply(province_3[names(province_3)], \(.) all(is.na(.))))) + } +) + +## When is.null(.by) and analysis pah is add NA's ---- +testthat::test_that( + "mw_estimate_prevalence_muac() works as expected", + { + ### Get the prevalence estimates ---- + p <- anthro.04 |> + subset(province == "Province 3") |> + mw_estimate_prevalence_muac(edema = edema, .by = NULL) + + ### The test ---- + testthat::expect_s3_class(p, "tbl") + testthat::expect_true(all(sapply(p[names(p)], \(.) all(is.na(.))))) + } +) + +## When MUAC is not in millimeters the function errors ---- +testthat::test_that( + "When MUAC is not in centimeters, the function stop execution", + { + testthat::expect_error( + x <- anthro.01 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = age, + .recode_sex = FALSE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = F, + .decimals = 3 + ) |> + mw_estimate_prevalence_muac(edema = edema), + regexp = "MUAC values must be in millimeters. Please try again." + ) + } +) + + +# Test check: mw_estimate_smart_age_wt() ---- +testthat::test_that( + "mw_estimate_smart_age_wt() works well", + { + ## Observed results ---- + p <- anthro.04 |> + subset(province == "Province 2") |> + mw_estimate_smart_age_wt() + + ## Expected results ---- + gam <- 11.2 + sam <- 2.0 + mam <- 9.2 + + ## Tests ---- + testthat::expect_s3_class(p, "tbl") + testthat::expect_equal(round(p[[3]][1] * 100, 1), gam) + testthat::expect_equal(round(p[[1]][1] * 100, 1), sam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), mam) + + } +) +## When MUAC is not in millimeters the function errors ---- +testthat::test_that( + "When MUAC is not in centimeters, the function stop execution", + { + testthat::expect_error( + x <- anthro.01 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = age, + .recode_sex = FALSE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = F, + .decimals = 3 + ) |> + mw_estimate_smart_age_wt(edema = edema), + regexp = "MUAC values must be in millimeters. Please try again." + ) + } +) diff --git a/tests/testthat/test-prev_wasting_probit.R b/tests/testthat/test-prev_wasting_probit.R new file mode 100644 index 0000000..dd82398 --- /dev/null +++ b/tests/testthat/test-prev_wasting_probit.R @@ -0,0 +1,66 @@ +# Test check: apply_probit_method() ---- +testthat::test_that( + "apply_probit_approach works", + { + ## Input data ---- + x <- anthro.03 |> + mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> + subset(district == "Metuge") + + ## Observed results ---- + p_gam <- apply_probit_method(x$wfhz, .status = "gam") + p_sam <- apply_probit_method(x$wfhz, .status = "sam") + + ## Tests ---- + testthat::expect_vector(c(p_gam, p_sam), ptype = double(), size = 2) + } +) + +# Test check: estimate_probit_prevalence() ---- +testthat::test_that( + "estimate_probit_prevalence works OK", + { + ## Input data ---- + p <- anthro.03 |> + mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> + subset(district == "Metuge") |> + estimate_probit_prevalence(.for = "wfhz", .by = NULL) + + ## Tests ---- + testthat::expect_s3_class(p, class = "tbl", exact = FALSE) + testthat::expect_length(p, 3) + testthat::expect_vector(p, nrow(1), ncol(3)) + } +) + +# Test check: estimate_probit_prevalence() ---- +## When `.by` is not NULL ---- +testthat::test_that( + "estimate_probit_prevalence works OK", + { + ## Input data ---- + p <- anthro.03 |> + mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> + subset(district == "Metuge" | district == "Maravia") |> + estimate_probit_prevalence(.by = district, .for = "wfhz") + + ## Tests ---- + testthat::expect_length(p, 4) + testthat::expect_vector(p, nrow(2)) + } +) + +# Test check: estimate_probit_prevalence() ---- +testthat::test_that( + "estimate_probit_prevalence works OK with `.for` set to 'mfaz'", + { + ## Input data ---- + p <- mfaz.01 |> + estimate_probit_prevalence(.for = "mfaz", .by = NULL) + + ## Tests ---- + testthat::expect_s3_class(p, class = "tbl", exact = FALSE) + testthat::expect_length(p, 3) + testthat::expect_vector(p, nrow(1)) + } +) diff --git a/tests/testthat/test-prev_wasting_screening.R b/tests/testthat/test-prev_wasting_screening.R new file mode 100644 index 0000000..4cbaf4e --- /dev/null +++ b/tests/testthat/test-prev_wasting_screening.R @@ -0,0 +1,301 @@ +# Test check: get_estimates() ---- +## When is.null(.by) ---- +testthat::test_that( + "get_estimates() works OK", + { + ## Wrangle data ---- + df <- anthro.02 |> + mutate( + muacx = as.character(muac), + edemax = as.factor(edema), + ede = ifelse(edema == "y", "yes", 0) + ) + + ### Get estimates ---- + p <- df |> + get_estimates( + muac = muac, + edema = edema, + .by = NULL + ) + + ### Observed estimates ---- + gam_n <- 118 + gam_p <- 5.4 + sam_n <- 29 + sam_p <- 1.3 + mam_n <- 89 + mam_p <- 4.1 + + ### Tests ---- + testthat::expect_s3_class(p, "tbl_df") + testthat::expect_equal(ncol(p), 6) + testthat::expect_equal(nrow(p), 1) + testthat::expect_true( + all(c("gam_n", "gam_p", "sam_n", "sam_p", "mam_n", "mam_p") %in% names(p)) + ) + testthat::expect_equal(p[[1]][1], gam_n) + testthat::expect_equal(round(p[[2]][1] * 100, 1), gam_p) + testthat::expect_equal(p[[3]][1], sam_n) + testthat::expect_equal(round(p[[4]][1] * 100, 1), sam_p) + testthat::expect_equal(p[[5]][1], mam_n) + testthat::expect_equal(round(p[[6]][1] * 100, 1), mam_p) + testthat::expect_error( + anthro.02 |> + mutate(muac = recode_muac(muac, .to = "cm")) |> + get_estimates( + muac = muac, + edema = edema, + .by = NULL + ), + regexp = "MUAC values must be in millimeters. Try again!" + ) + testthat::expect_error( + df |> + get_estimates( + muac = muacx, + edema = edema, + .by = NULL + ), + regexp = paste0( + "`muac` should be of class numeric; not ", + shQuote(class(df$muacx)), ". Try again!" + ) + ) + testthat::expect_error( + df |> + get_estimates( + muac = muac, + edema = edemax, + .by = NULL + ), + regexp = paste0( + "`edema` should be of class character; not ", + shQuote(class(df$edemax)), ". Try again!" + ) + ) + testthat::expect_error( + df |> + get_estimates( + muac = muac, + edema = ede, + .by = NULL + ), + regexp = "Code values in `edema` must only be 'y' and 'n'. Try again!" + ) + } +) + +## When is.null(edema) & is.null(.by)---- +testthat::test_that( + "get_estimates() works OK when edema and .by are both null", + { + ### Get estimates ---- + p <- anthro.02 |> + get_estimates( + muac = muac, + edema = NULL, + .by = NULL + ) + + ### Observed estimates ---- + gam_n <- 106 + gam_p <- 4.9 + sam_n <- 16 + sam_p <- 0.7 + mam_n <- 90 + mam_p <- 4.1 + + ### Tests ---- + testthat::expect_s3_class(p, "tbl_df") + testthat::expect_equal(ncol(p), 6) + testthat::expect_equal(nrow(p), 1) + testthat::expect_true( + all(c("gam_n", "gam_p", "sam_n", "sam_p", "mam_n", "mam_p") %in% names(p)) + ) + testthat::expect_equal(p[[1]][1], gam_n) + testthat::expect_equal(round(p[[2]][1] * 100, 1), gam_p) + testthat::expect_equal(p[[3]][1], sam_n) + testthat::expect_equal(round(p[[4]][1] * 100, 1), sam_p) + testthat::expect_equal(p[[5]][1], mam_n) + testthat::expect_equal(round(p[[6]][1] * 100, 1), mam_p) + } +) + +## When !is.null(.by) ---- +testthat::test_that( + "get_estimates() works OK when `.by` is not null", + { + ### Get estimates ---- + p <- anthro.02 |> + get_estimates( + muac = muac, + edema = edema, + .by = province + ) + + ### Observed estimates ---- + gam_n <- 61 + gam_p <- 5.9 + sam_n <- 19 + sam_p <- 1.8 + mam_n <- 42 + mam_p <- 4.1 + + ### Tests ---- + testthat::expect_s3_class(p, "tbl_df") + testthat::expect_equal(ncol(p), 7) + testthat::expect_equal(nrow(p), 2) + testthat::expect_true( + all(c("province", "gam_n", "gam_p", "sam_n", "sam_p", "mam_n", "mam_p") %in% names(p)) + ) + testthat::expect_equal(p[[2]][1], gam_n) + testthat::expect_equal(round(p[[3]][1] * 100, 1), gam_p) + testthat::expect_equal(p[[4]][1], sam_n) + testthat::expect_equal(round(p[[5]][1] * 100, 1), sam_p) + testthat::expect_equal(p[[6]][1], mam_n) + testthat::expect_equal(round(p[[7]][1] * 100, 1), mam_p) + } +) + +# Test check: mw_estimate_prevalence_screening() ---- +testthat::test_that( + "mw_estimate_prevalence_screening() works OK", + { + ### Get estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_screening( + muac = muac, + edema = edema, + .by = province + ) + + ### Observed estimates ---- + gam_n <- 61 + gam_p <- 5.9 + sam_n <- 19 + sam_p <- 1.8 + mam_n <- 42 + mam_p <- 4.1 + + ### Tests ---- + testthat::expect_s3_class(p, "tbl_df") + testthat::expect_equal(ncol(p), 7) + testthat::expect_equal(nrow(p), 2) + testthat::expect_true( + all(c("province", "gam_n", "gam_p", "sam_n", "sam_p", "mam_n", "mam_p") %in% names(p)) + ) + testthat::expect_equal(p[[2]][1], gam_n) + testthat::expect_equal(round(p[[3]][1] * 100, 1), gam_p) + testthat::expect_equal(p[[4]][1], sam_n) + testthat::expect_equal(round(p[[5]][1] * 100, 1), sam_p) + testthat::expect_equal(p[[6]][1], mam_n) + testthat::expect_equal(round(p[[7]][1] * 100, 1), mam_p) + } +) + +## When is.null(.by) +testthat::test_that( + "mw_estimate_prevalence_screening() works OK when `.by` is null", + { + ### Get estimates ---- + p <- anthro.02 |> + mw_estimate_prevalence_screening( + muac = muac, + edema = edema, + .by = NULL + ) + + ### Observed estimates ---- + gam_n <- 118 + gam_p <- 5.4 + sam_n <- 29 + sam_p <- 1.3 + mam_n <- 89 + mam_p <- 4.1 + + + ### Tests ---- + testthat::expect_s3_class(p, "tbl_df") + testthat::expect_equal(ncol(p), 6) + testthat::expect_equal(nrow(p), 1) + testthat::expect_true( + all(c("gam_n", "gam_p", "sam_n", "sam_p", "mam_n", "mam_p") %in% names(p)) + ) + testthat::expect_equal(p[[1]][1], gam_n) + testthat::expect_equal(round(p[[2]][1] * 100, 1), gam_p) + testthat::expect_equal(p[[3]][1], sam_n) + testthat::expect_equal(round(p[[4]][1] * 100, 1), sam_p) + testthat::expect_equal(p[[5]][1], mam_n) + testthat::expect_equal(round(p[[6]][1] * 100, 1), mam_p) + } +) + +## When an age-weighting approach is applied ---- +testthat::test_that( + "mw_estimate_prevalence_screening() applies age-weighting correctly", + { + ## Observed results ---- + p <- anthro.04 |> + subset(province == "Province 2") |> + mw_estimate_prevalence_screening() + + ## Expected results ---- + gam <- 11.2 + sam <- 2.0 + mam <- 9.2 + + ## Tests ---- + testthat::expect_s3_class(p, "tbl") + testthat::expect_equal(round(p[[3]][1] * 100, 1), gam) + testthat::expect_equal(round(p[[1]][1] * 100, 1), sam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), mam) + } +) + +## When used on a multiple-area data set ---- +testthat::test_that( + "mw_estimate_prevalence_screening() works well on a multiple-area dataset with + different categories of analysis_approach", + { + ### Get the prevalence estimates ---- + p <- anthro.04 |> + mw_estimate_prevalence_screening(muac = muac, edema = edema, .by = province) + + ### A Province whose analysis approach is unweighted --- + province_1 <- subset(p, province == "Province 1") + + ### A Province whose analysis approach is weighted --- + province_2 <- subset(p, province == "Province 2") + + ### A Province whose analysis approach is add missing (NA's) --- + province_3 <- subset(p, province == "Province 3") |> + select(!province) + + columns_to_check <- c("gam_n", "gam_p", "sam_n", "sam_p", "mam_n", "mam_p") + + ### test ---- + testthat::expect_vector(select(p, !province), size = 3, ncol(7)) + testthat::expect_s3_class(p, "tbl") + testthat::expect_true(all(sapply(province_3[columns_to_check], \(.) all(is.na(.))))) + testthat::expect_false(all(sapply(province_2[columns_to_check], \(.) all(is.na(.))))) + } +) + +## When the analysis path is to throw NA's ---- +testthat::test_that( + "mw_estimate_prevalence_screening() throws NA's", + { + ### Get the prevalence estimates ---- + p <- anthro.04 |> + subset(province == "Province 3") |> + mw_estimate_prevalence_screening(muac = muac, edema = edema) + + columns_to_check <- c("gam_p", "sam_p", "mam_p") + + ### test ---- + testthat::expect_vector(p, size = 1, ncol(3)) + testthat::expect_s3_class(p, "tbl") + testthat::expect_true(all(sapply(p[columns_to_check], \(.) all(is.na(.))))) + } +) diff --git a/tests/testthat/test-prev_wasting_wfhz.R b/tests/testthat/test-prev_wasting_wfhz.R new file mode 100644 index 0000000..1447685 --- /dev/null +++ b/tests/testthat/test-prev_wasting_wfhz.R @@ -0,0 +1,244 @@ +# Test check: mw_estimate_prevalence_wfhz() ---- +## When std =! problematic & !is.null(wt) & !is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_wfhz() yields correct estimates", + { + ### Get the prevalence estimates ---- + p <- mw_estimate_prevalence_wfhz( + df = anthro.02, + edema = edema, + wt = wtfactor, + .by = NULL + ) + + ### Expected results ---- + #### GAM estimates and uncertainty ---- + n_gam <- 121 + p_gam <- 4.3 + p_gam_lci <- 3.2 + p_gam_uci <- 5.4 + deff <- 1.58 + + #### SAM estimates and uncertainty ---- + n_sam <- 43 + p_sam <- 0.8 + p_sam_lci <- 0.2 + p_sam_uci <- 1.3 + + #### MAM estimates and uncertainty ---- + n_mam <- 78 + p_mam <- 3.5 + p_mam_lci <- 2.6 + p_mam_uci <- 4.5 + + #### Sum of weigths ---- + sum_wt <- 1752680 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(round(p[[5]][1], 2), deff) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + testthat::expect_equal(p[[16]][1], sum_wt) + } +) + +## When std != problematic & is.null(wt) & !is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_wfhz() yields correct estimates when survey weights is + NULL", + { + ### Get the prevalence estimates ---- + p <- mw_estimate_prevalence_wfhz( + df = wfhz.01, + edema = edema, + .by = NULL + ) + + ### Expected results ---- + #### GAM estimates and uncertainty ---- + n_gam <- 25 + p_gam <- 7.4 + p_gam_lci <- 3.5 + p_gam_uci <- 11.3 + + #### SAM estimates and uncertainty ---- + n_sam <- 4 + p_sam <- 0.3 + p_sam_lci <- -0.3 + p_sam_uci <- 1.0 + + #### MAM estimates and uncertainty ---- + n_mam <- 21 + p_mam <- 7.0 + p_mam_lci <- 3.1 + p_mam_uci <- 11.0 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + +## When std != problematic & is.null(wt) & is.null(edema) ---- +testthat::test_that( + "mw_estimate_prevalence_wfhz() yields correct estimates when edema is + NULL", + { + ### Get the prevalence estimates ---- + p <- mw_estimate_prevalence_wfhz( + df = anthro.02, + edema = NULL, + .by = NULL + ) + + ### Expected results ---- + #### GAM estimates and uncertainty ---- + n_gam <- 107 + p_gam <- 3.4 + p_gam_lci <- 2.6 + p_gam_uci <- 4.2 + + #### SAM estimates and uncertainty ---- + n_sam <- 29 + p_sam <- 0.0 + p_sam_lci <- 0.0 + p_sam_uci <- 0.0 + + #### MAM estimates and uncertainty ---- + n_mam <- 78 + p_mam <- 3.4 + p_mam_lci <- 2.6 + p_mam_uci <- 4.2 + + ### Tests ---- + testthat::expect_equal(p[[1]][1], n_gam) + testthat::expect_equal(round(p[[2]][1] * 100, 1), p_gam) + testthat::expect_equal(round(p[[3]][1] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[4]][1] * 100, 1), p_gam_uci) + testthat::expect_equal(p[[6]][1], n_sam) + testthat::expect_equal(round(p[[7]][1] * 100, 1), p_sam) + testthat::expect_equal(round(p[[8]][1] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[9]][1] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[11]][1], n_mam) + testthat::expect_equal(round(p[[12]][1] * 100, 1), p_mam) + testthat::expect_equal(round(p[[13]][1] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[14]][1] * 100, 1), p_mam_uci) + } +) + +## When std =! problematic & !is.null(wt) with .by = province ---- +testthat::test_that( + "mw_estimate_prevalence_wfhz() yields correct estimates when .by is + used", + { + ### Get the prevalence estimates ---- + p <- mw_estimate_prevalence_wfhz( + df = anthro.02, + edema = edema, + wt = wtfactor, + .by = province + ) + + ### Expected results for Nampula province ---- + #### GAM estimates and uncertainty ---- + n_gam <- 80 + p_gam <- 5.9 + p_gam_lci <- 4.1 + p_gam_uci <- 7.8 + deff <- 1.52 + + #### SAM estimates and uncertainty ---- + n_sam <- 33 + p_sam <- 1.3 + p_sam_lci <- 0.3 + p_sam_uci <- 2.3 + + #### MAM estimates and uncertainty ---- + n_mam <- 47 + p_mam <- 4.7 + p_mam_lci <- 3.1 + p_mam_uci <- 6.2 + + #### Sum of weigths ---- + sum_wt <- 878704 + + ### Tests ---- + testthat::expect_equal(p[[2]][2], n_gam) + testthat::expect_equal(round(p[[3]][2] * 100, 1), p_gam) + testthat::expect_equal(round(p[[4]][2] * 100, 1), p_gam_lci) + testthat::expect_equal(round(p[[5]][2] * 100, 1), p_gam_uci) + testthat::expect_equal(round(p[[6]][2], 2), deff) + testthat::expect_equal(p[[7]][2], n_sam) + testthat::expect_equal(round(p[[8]][2] * 100, 1), p_sam) + testthat::expect_equal(round(p[[9]][2] * 100, 1), p_sam_lci) + testthat::expect_equal(round(p[[10]][2] * 100, 1), p_sam_uci) + testthat::expect_equal(p[[12]][2], n_mam) + testthat::expect_equal(round(p[[13]][2] * 100, 1), p_mam) + testthat::expect_equal(round(p[[14]][2] * 100, 1), p_mam_lci) + testthat::expect_equal(round(p[[15]][2] * 100, 1), p_mam_uci) + testthat::expect_equal(p[[17]][2], sum_wt) + } +) + +## When std == problematic & is.null(wt) ---- +testthat::test_that( + "mw_estimate_prevalence_wfhz() works well on a multi-area dataset with + wfhz standard deviation taking different rates", + { + ### Get the prevalence estimates ---- + p <- anthro.03 |> + mw_wrangle_wfhz( + sex, + weight, + height, + .recode_sex = TRUE + ) |> + mw_estimate_prevalence_wfhz( + edema = edema, + wt = NULL, + .by = district + ) + + ### Select a district where standard deviation is rated as problematic ---- + metuge_df <- subset(p, district == "Metuge") + + ### Select a district where standard deviation is rated as problematic ---- + maravia_df <- subset(p, district == "Maravia") + + ### Select a district where standard deviation is rated as not problematic ---- + chiuta_df <- subset(p, district == "Chiuta") + + columns_to_check <- c( + "gam_n", "gam_p_low", "gam_p_upp", "sam_n", + "sam_p_low", "sam_p_upp", "mam_n", "mam_p_low", + "mam_p_upp", "wt_pop" + ) + + ### Tests ---- + testthat::expect_vector(dplyr::select(p, !district), size = 4, ncol(17)) + testthat::expect_s3_class(p, "tbl") + testthat::expect_true(all(sapply(metuge_df[columns_to_check], \(.) all(is.na(.))))) + testthat::expect_true(all(sapply(maravia_df[columns_to_check], \(.) all(is.na(.))))) + testthat::expect_false(all(sapply(chiuta_df[columns_to_check], \(.) all(is.na(.))))) + } +) diff --git a/tests/testthat/test-prevalence_combined.R b/tests/testthat/test-prevalence_combined.R deleted file mode 100644 index c3079c0..0000000 --- a/tests/testthat/test-prevalence_combined.R +++ /dev/null @@ -1,256 +0,0 @@ -## Test check: compute_combined_prevalence() ---- -### When std != problematic & muac_analysis == unweighted & !is.null(.wt) ---- -local({ - - #### Get prevalence estimates ---- - p <- anthro.02 |> - compute_combined_prevalence(.edema = edema, .wt = "wtfactor") - - #### Expected results ---- - ##### combined GAM estimates and uncertainty ---- - n_cgam <- 199 - p_cgam <- 7.1 - p_cgam_lci <- 5.6 - p_cgam_uci <- 8.5 - deff <- 1.72 - - ##### combined SAM estimates and uncertainty ---- - n_csam <- 68 - p_csam <- 1.5 - p_csam_lci <- 0.8 - p_csam_uci <- 2.3 - - ##### combined MAM estimates and uncertainty ---- - n_cmam <- 145 - p_cmam <- 6.0 - p_cmam_lci <- 4.7 - p_cmam_uci <- 7.3 - - ##### Sum of weights ----- - sum_wt <- 1738110 - - #### The test ---- - testthat::test_that( - "compute_combined_prevalence() yields correct estimates when edema and - survey weights are supplied", - { - testthat::expect_equal(p[[1]][1], n_cgam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_cgam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_cgam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_cgam_uci) - testthat::expect_equal(round(p[[5]][1], 2), deff) - testthat::expect_equal(p[[6]][1], n_csam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_csam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_csam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_csam_uci) - testthat::expect_equal(p[[11]][1], n_cmam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_cmam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_cmam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_cmam_uci) - testthat::expect_equal(round(p[[16]][1]), sum_wt) - } - ) -}) - -### When std != problematic & muac_analysis == unweighted & !is.null(.wt) ---- -local({ - - #### Get prevalence estimates ---- - p <- anthro.02 |> - compute_combined_prevalence(.edema = NULL, .wt = "wtfactor") - - #### Expected results ---- - ##### combined GAM estimates and uncertainty ---- - n_cgam <- 187 - p_cgam <- 6.4 - p_cgam_lci <- 5.0 - p_cgam_uci <- 7.8 - deff <- 1.67 - - ##### combined SAM estimates and uncertainty ---- - n_csam <- 55 - p_csam <- 0.8 - p_csam_lci <- 0.3 - p_csam_uci <- 1.2 - - ##### combined MAM estimates and uncertainty ---- - n_cmam <- 146 - p_cmam <- 6.1 - p_cmam_lci <- 4.8 - p_cmam_uci <- 7.4 - - #### Sum of weights ---- - sum_wt <- 1738110 - - #### The test ---- - testthat::test_that( - "compute_combined_prevalence() yields correct estimates when edema is NULL", - { - testthat::expect_equal(p[[1]][1], n_cgam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_cgam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_cgam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_cgam_uci) - testthat::expect_equal(p[[6]][1], n_csam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_csam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_csam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_csam_uci) - testthat::expect_equal(p[[11]][1], n_cmam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_cmam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_cmam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_cmam_uci) - testthat::expect_equal(round(p[[16]][1]), sum_wt) - } - ) -}) - - -### When is.null(.wt) ---- -local({ - - #### Get prevalence estimates ---- - p <- anthro.02 |> - compute_combined_prevalence(.edema = edema) - - #### Expected results ---- - ##### combined GAM estimates and uncertainty ---- - n_cgam <- 199 - p_cgam <- 6.8 - p_cgam_lci <- 5.7 - p_cgam_uci <- 8.0 - - ##### combined SAM estimates and uncertainty ---- - n_csam <- 68 - p_csam <- 1.3 - p_csam_lci <- 0.8 - p_csam_uci <- 1.8 - - ##### combined MAM estimates and uncertainty ---- - n_cmam <- 145 - p_cmam <- 5.9 - p_cmam_lci <- 4.8 - p_cmam_uci <- 7.0 - - #### The test ---- - testthat::test_that( - "compute_combined_prevalence() yields correct estimates", - { - testthat::expect_equal(p[[1]][1], n_cgam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_cgam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_cgam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_cgam_uci) - testthat::expect_equal(p[[6]][1], n_csam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_csam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_csam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_csam_uci) - testthat::expect_equal(p[[11]][1], n_cmam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_cmam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_cmam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_cmam_uci) - } - ) -}) - -### When !is.null(.wt) with .summary_by = province ---- -local({ - - #### Get prevalence estimates ---- - p <- anthro.02 |> - compute_combined_prevalence( - .edema = edema, - .wt = "wtfactor", - .summary_by = province - ) - - #### Expected results for Nampula province ---- - ##### GAM estimates and uncertainty ---- - n_cgam <- 121 - p_cgam <- 8.4 - p_cgam_lci <- 6.0 - p_cgam_uci <- 10.9 - deff <- 1.87 - - ##### SAM estimates and uncertainty ---- - n_csam <- 47 - p_csam <- 2.0 - p_csam_lci <- 0.7 - p_csam_uci <- 3.3 - - ##### MAM estimates and uncertainty ---- - n_cmam <- 80 - p_cmam <- 6.8 - p_cmam_lci <- 4.7 - p_cmam_uci <- 9.0 - - ##### Sum of survey weights ----- - sum_wt <- 869504 - #### The test ---- - testthat::test_that( - "compute_combined_prevalence() yields correct estimates when .summary_by is - used", - { - testthat::expect_equal(p[[2]][2], n_cgam) - testthat::expect_equal(round(p[[3]][2]*100, 1), p_cgam) - testthat::expect_equal(round(p[[4]][2]*100, 1), p_cgam_lci) - testthat::expect_equal(round(p[[5]][2]*100, 1), p_cgam_uci) - testthat::expect_equal(round(p[[6]][2], 2), deff) - testthat::expect_equal(p[[7]][2], n_csam) - testthat::expect_equal(round(p[[8]][2]*100, 1), p_csam) - testthat::expect_equal(round(p[[9]][2]*100, 1), p_csam_lci) - testthat::expect_equal(round(p[[10]][2]*100, 1), p_csam_uci) - testthat::expect_equal(p[[12]][2], n_cmam) - testthat::expect_equal(round(p[[13]][2]*100, 1), p_cmam) - testthat::expect_equal(round(p[[14]][2]*100, 1), p_cmam_lci) - testthat::expect_equal(round(p[[15]][2]*100, 1), p_cmam_uci) - testthat::expect_equal(round(p[[17]][2]), sum_wt) - } - ) -}) - -### When !is.null(.summary_by) and analysis approach has different categories ---- -local({ - - ### Get the prevalence estimates ---- - p <- anthro.03 |> - mw_wrangle_age( - age = age, - .decimals = 2 - ) |> - mw_wrangle_muac( - sex = sex, - muac = muac, - age = "age", - .recode_sex = TRUE, - .recode_muac = TRUE, - .to = "cm" - ) |> - dplyr::mutate(muac = recode_muac(muac, .to = "mm")) |> - mw_wrangle_wfhz( - sex = sex, - weight = weight, - height = height, - .recode_sex = TRUE) |> - compute_combined_prevalence( - .edema = edema, - .summary_by = district - ) - - ### Subset a district where a normal analysis should be computed ---- - CB <- subset(p, district == "Cahora-Bassa") - - ## Subset a district where NA should be thrown ---- - M <- subset(p, district == "Maravia")|> dplyr::select(!district) - - ### The test ---- - testthat::test_that( - "compute_combined_prevalence() works well on a dataframe with multiple survey areas with - different analysis approach required", - { - testthat::expect_vector(dplyr::select(p, !district), size = 4, ncol(17)) - testthat::expect_s3_class(p, "tbl") - testthat::expect_false(all(sapply(CB[names(CB)], \(.) all(is.na(.))))) - testthat::expect_true(all(sapply(M[names(M)], \(.) all(is.na(.))))) - } - ) -}) - - diff --git a/tests/testthat/test-prevalence_mfaz.R b/tests/testthat/test-prevalence_mfaz.R deleted file mode 100644 index 02932b0..0000000 --- a/tests/testthat/test-prevalence_mfaz.R +++ /dev/null @@ -1,82 +0,0 @@ -# Test check: compute_mfaz_prevalence ---- - -## When std != problematic & is.null(.wt) & !is.null(.edema) ---- -local({ - - ### Get the prevalence estimates ---- - p <- anthro.04 |> - compute_mfaz_prevalence(.edema = edema, .summary_by = NULL) - - ### Expected results ---- - #### GAM estimates and uncertainty ---- - n_gam <- 330 - p_gam <- 10.7 - p_gam_lci <- 8.7 - p_gam_uci <- 12.7 - - #### SAM estimates and uncertainty ---- - n_sam <- 53 - p_sam <- 1.4 - p_sam_lci <- 0.9 - p_sam_uci <- 2.0 - - #### MAM estimates and uncertainty ---- - n_mam <- 277 - p_mam <- 9.3 - p_mam_lci <- 7.5 - p_mam_uci <- 11.1 - - ### The test ---- - testthat::test_that( - "compute_mfaz_prevalence() yields correct estimates", - { - testthat::expect_equal(p[[1]][1], n_gam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_gam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_gam_uci) - testthat::expect_equal(p[[6]][1], n_sam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_sam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_sam_uci) - testthat::expect_equal(p[[11]][1], n_mam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_mam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_mam_uci) - } - ) -}) - - -## When standard deviation == problematic ---- -local({ - - ### Get the prevalence estimates ---- - p <- anthro.04 |> - compute_mfaz_prevalence(.edema = edema, .summary_by = province) - - ### Subset a province whose analysis approach is unweighted --- - province_1 <- subset(p, province == "Province 1") - - ### Subset a province whose analysis approach is weighted --- - province_3 <- subset(p, province == "Province 3") - - - columns_to_check <- c("gam_n", "gam_p_low", "gam_p_upp", "sam_n", - "sam_p_low", "sam_p_upp", "mam_n", "mam_p_low", - "mam_p_upp", "wt_pop") - - ### The test ---- - - testthat::test_that( - "compute_mfaz_prevalence() works well on a dataframe with multiple survey areas with - different categories on analysis_approach", - { - testthat::expect_vector(dplyr::select(p, !province), size = 3, ncol(17)) - testthat::expect_s3_class(p, "tbl") - testthat::expect_false(all(sapply(province_1[columns_to_check], \(.) all(is.na(.))))) - testthat::expect_true(all(sapply(province_3[columns_to_check], \(.) all(is.na(.))))) - } - ) -}) - - diff --git a/tests/testthat/test-prevalence_muac.R b/tests/testthat/test-prevalence_muac.R deleted file mode 100644 index 742cab7..0000000 --- a/tests/testthat/test-prevalence_muac.R +++ /dev/null @@ -1,403 +0,0 @@ -### Test check: tell_muac_analysis_strategy() ---- - -local({ - ### Input data ---- - age_ratio_class_1 <- "Problematic" - age_ratio_class_2 <- "Good" - std_class_1 <- "Excellent" - std_class_2 <- "Problematic" - - ### Expected results ---- - expected_1 <- "weighted" - expected_2 <- "missing" - expected_3 <- "unweighted" - - ### Observed results ---- - obs_1 <- tell_muac_analysis_strategy(age_ratio_class_1, std_class_1) - obs_2 <- tell_muac_analysis_strategy(age_ratio_class_1, std_class_2) - obs_3 <- tell_muac_analysis_strategy(age_ratio_class_2, std_class_1) - - ### The test ---- - testthat::test_that( - "tell_muac_analysis_strategy() works", - { - testthat::expect_equal(obs_1, expected_1) - testthat::expect_equal(obs_2, expected_2) - testthat::expect_equal(obs_3, expected_3) - } - ) -}) - -### Test check: classify_wasting() with edema set to NULL ---- - -local({ - #### Input data ---- - muac_values <- c( - 123, 129, 126, 113, 130, 122, 112, 124, 128, - 121, 120, 110, 114, 125, 119, 127, 117, 118, 111, 115 - ) - edema <- NULL - - #### Expected results ---- - expected <- c( - "mam", "not wasted", "not wasted", "sam", "not wasted", "mam", "sam", "mam", - "not wasted", "mam", "mam", "sam", "sam", "not wasted", "mam", "not wasted", - "mam", "mam", "sam", "mam" - ) - - #### Observed results ---- - obs <- classify_wasting_for_cdc_approach(muac = muac_values, .edema = edema) - - #### The test ---- - testthat::test_that( - "cdc_classify_wasting() does his job well", - { - testthat::expect_vector(obs, ptype = "character", size = 20) - testthat::expect_equal(obs, expected) - } - ) -}) - - -### Test check: cdc_apply_age_weighting() ---- -#### Edema set to !NULL ---- - -local({ - #### Input data ---- - x <- mfaz.01 |> - mw_wrangle_age( - age = age - ) |> - mw_wrangle_muac( - sex = sex, - muac = muac, - age = age, - .recode_sex = TRUE, - .recode_muac = TRUE, - .to = "cm" - ) |> - subset(flag_mfaz == 0) |> - mutate(muac = recode_muac(muac, .to = "mm")) - - - #### Expected results calculated in the CDC/SMART MUAC tool ---- - expect_sam <- 0.021 - expect_mam <- 0.081 - - #### Observed results ---- - obs_sam <- with(x, - apply_cdc_age_weighting( - muac = muac, - .edema = edema, - age = age, - status = "sam") - ) - obs_mam <- with(x, - apply_cdc_age_weighting( - muac = muac, - .edema = edema, - age = age, - status = "mam") - ) - - #### The test ---- - testthat::test_that( - "apply_cdc_age_weighting() works amazing", - { - testthat::expect_vector(obs_sam, size = 1) - testthat::expect_vector(obs_mam, size = 1) - testthat::expect_equal(round(obs_sam, 3), expect_sam) - testthat::expect_equal(round(obs_mam, 3), expect_mam) - } - ) -}) - -### Edema set to NULL ---- -local({ - #### Input data ---- - x <- mfaz.01 |> - mw_wrangle_age( - age = age, - .decimals = 2 - ) |> - mw_wrangle_muac( - sex = sex, - muac = muac, - age = age, - .recode_sex = TRUE, - .recode_muac = TRUE, - .to = "cm" - ) |> - subset(flag_mfaz == 0) |> - mutate( - muac = recode_muac(muac, .to = "mm")) - - - #### Expected results calculated in the CDC/SMART MUAC tool ---- - expect_sam <- 0.014 - expect_mam <- 0.080 - - #### Observed results ---- - obs_sam <- with(x, - apply_cdc_age_weighting( - muac = muac, - age = age, - status = "sam") - ) - obs_mam <- with(x, - apply_cdc_age_weighting( - muac = muac, - age = age, - status = "mam") - ) - - #### The test ---- - testthat::test_that( - "apply_cdc_age_weighting() works amazing", - { - testthat::expect_vector(obs_sam, size = 1) - testthat::expect_vector(obs_mam, size = 1) - testthat::expect_equal(round(obs_sam, 3), expect_sam) - testthat::expect_equal(round(obs_mam, 2), expect_mam) - } - ) -}) - -## Test check: compute_muac_prevalence() ---- -#### When age_ratio & std != problematic & !is.null(.wt) & !is.null(.edema) ---- -local({ - - #### Get the prevalence estimates ---- - p <- anthro.02 |> - compute_muac_prevalence(.edema = edema, .wt = "wtfactor", .summary_by = NULL) - - #### Expected results ---- - ##### GAM estimates and uncertainty ---- - n_gam <- 135 - p_gam <- 5.6 - p_gam_lci <- 4.3 - p_gam_uci <- 6.9 - deff <- 1.86 - - ##### SAM estimates and uncertainty ---- - n_sam <- 46 - p_sam <- 1.7 - p_sam_lci <- 0.9 - p_sam_uci <- 2.4 - - ##### MAM estimates and uncertainty ---- - n_mam <- 89 - p_mam <- 4.0 - p_mam_lci <- 3.0 - p_mam_uci <- 4.9 - - #### The test ---- - testthat::test_that( - "compute_muac_prevalence() yields correct estimates when edema and survey - weights are supplied", - { - testthat::expect_equal(p[[1]][1], n_gam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_gam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_gam_uci) - testthat::expect_equal(p[[6]][1], n_sam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_sam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_sam_uci) - testthat::expect_equal(p[[11]][1], n_mam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_mam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_mam_uci) - } - ) -}) - -#### When age_ratio & std != problematic & !is.null(.wt) & !is.null(.edema) ---- -local({ - - #### Get the prevalence estimates ---- - p <- anthro.02 |> - compute_muac_prevalence(.edema = NULL, .wt = "wtfactor", .summary_by = NULL) - - #### Expected results ---- - ##### GAM estimates and uncertainty ---- - n_gam <- 123 - p_gam <- 5.0 - p_gam_lci <- 3.8 - p_gam_uci <- 6.2 - deff <- 1.75 - - ##### SAM estimates and uncertainty ---- - n_sam <- 33 - p_sam <- 0.9 - p_sam_lci <- 0.4 - p_sam_uci <- 1.5 - - ##### MAM estimates and uncertainty ---- - n_mam <- 90 - p_mam <- 4.0 - p_mam_lci <- 3.1 - p_mam_uci <- 5.0 - - #### The test ---- - testthat::test_that( - "compute_muac_prevalence() yields correct estimates when edema is not - supplied", - { - testthat::expect_equal(p[[1]][1], n_gam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_gam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_gam_uci) - testthat::expect_equal(p[[6]][1], n_sam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_sam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_sam_uci) - testthat::expect_equal(p[[11]][1], n_mam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_mam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_mam_uci) - } - ) -}) - - - -#### When age_ratio & std != problematic & is.null(.wt) ---- -local({ - - ##### Get prevalence estimates ---- - p <- anthro.02 |> - compute_muac_prevalence(.edema = edema, .summary_by = NULL) - - #### Expected results ---- - ##### GAM estimates and uncertainty ---- - n_gam <- 135 - p_gam <- 5.4 - p_gam_lci <- 4.3 - p_gam_uci <- 6.5 - - ##### SAM estimates and uncertainty ---- - n_sam <- 46 - p_sam <- 1.3 - p_sam_lci <- 0.8 - p_sam_uci <- 1.8 - - ##### MAM estimates and uncertainty ---- - n_mam <- 89 - p_mam <- 4.1 - p_mam_lci <- 3.1 - p_mam_uci <- 5.0 - - #### The test ---- - testthat::test_that( - "compute_muac_prevalence() yields correct estimates when edema is supplied", - { - testthat::expect_equal(p[[1]][1], n_gam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_gam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_gam_uci) - testthat::expect_equal(p[[6]][1], n_sam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_sam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_sam_uci) - testthat::expect_equal(p[[11]][1], n_mam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_mam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_mam_uci) - } - ) -}) - -### When age_ratio & std != problematic & !is.null(.wt) with .summary_by = province -local({ - - #### Get prevalence estimates ---- - p <- anthro.02 |> - compute_muac_prevalence( - .edema = edema, - .wt = "wtfactor", - .summary_by = province - ) - - #### Expected results for Zambezia province ---- - ##### GAM estimates and uncertainty ---- - n_gam <- 65 - p_gam <- 5.5 - p_gam_lci <- 3.8 - p_gam_uci <- 7.2 - deff <- 1.67 - - ##### SAM estimates and uncertainty ---- - n_sam <- 18 - p_sam <- 1.3 - p_sam_lci <- 0.4 - p_sam_uci <- 2.2 - - ##### MAM estimates and uncertainty ---- - n_mam <- 47 - p_mam <- 4.2 - p_mam_lci <- 3.0 - p_mam_uci <- 5.4 - - ##### Sum of weigths ---- - sum_wt <- 880902 - - #### The test ---- - testthat::test_that( - "compute_muac_prevalence() yields correct estimates when .summary_by is - used", - { - testthat::expect_equal(p[[2]][2], n_gam) - testthat::expect_equal(round(p[[3]][2]*100, 1), p_gam) - testthat::expect_equal(round(p[[4]][2]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[5]][2]*100, 1), p_gam_uci) - testthat::expect_equal(round(p[[6]][2], 2), deff) - testthat::expect_equal(p[[7]][2], n_sam) - testthat::expect_equal(round(p[[8]][2]*100, 1), p_sam) - testthat::expect_equal(round(p[[9]][2]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[10]][2]*100, 1), p_sam_uci) - testthat::expect_equal(p[[12]][2], n_mam) - testthat::expect_equal(round(p[[13]][2]*100, 1), p_mam) - testthat::expect_equal(round(p[[14]][2]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[15]][2]*100, 1), p_mam_uci) - testthat::expect_equal(p[[17]][2], sum_wt) - } - ) -}) - - -### When !is.null(.summary_by) and analysis approach has different categories ---- -local({ - - ## Get the prevalence estimates ---- - p <- anthro.04 |> - compute_muac_prevalence(.edema = edema, .summary_by = province) - - ## Subset a province whose analysis approach is unweighted --- - province_1 <- subset(p, province == "Province 1") - - ## Subset a province whose analysis approach is weighted --- - province_2 <- subset(p, province == "Province 2") - - ## Subset a province whose analysis approach is add missing (NA's) --- - province_3 <- subset(p, province == "Province 3") |> dplyr::select(!province) - - columns_to_check <- c("gam_n", "gam_p_low", "gam_p_upp", "sam_n", - "sam_p_low", "sam_p_upp", "mam_n", "mam_p_low", - "mam_p_upp", "wt_pop") - - ## The test ---- - - testthat::test_that( - "compute_muac_prevalence() works well on a dataframe with multiple survey areas with - different categories on analysis_approach", - { - testthat::expect_vector(dplyr::select(p, !province), size = 3, ncol(17)) - testthat::expect_s3_class(p, "tbl") - testthat::expect_false(all(sapply(province_1[columns_to_check], \(.) all(is.na(.))))) - testthat::expect_true(all(sapply(province_2[columns_to_check], \(.) all(is.na(.))))) - testthat::expect_true(all(sapply(province_3[names(province_3)], \(.) all(is.na(.))))) - } - ) -}) - diff --git a/tests/testthat/test-prevalence_wfhz.R b/tests/testthat/test-prevalence_wfhz.R deleted file mode 100644 index a4fb50d..0000000 --- a/tests/testthat/test-prevalence_wfhz.R +++ /dev/null @@ -1,299 +0,0 @@ -# Test check: apply_probit_approach() ---- -local({ - x <- anthro.03 |> - mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> - subset(district == "Metuge") - - p_gam <- apply_probit_approach(x$wfhz, .status = "gam") - p_sam <- apply_probit_approach(x$wfhz, .status = "sam") - - ## The test ---- - testthat::test_that( - "apply_probit_approach() works", - { - testthat::expect_vector(c(p_gam, p_sam), ptype = double(), size = 2) - } - ) -}) - -# Test check: compute_probit_prevalence() ---- -local({ - p <- anthro.03 |> - mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> - subset(district == "Metuge") |> - compute_probit_prevalence() - - ## The test ---- - testthat::test_that( - "compute_probit_prevalence() return the correct ouput object", - { - testthat::expect_s3_class(p, class = "tbl", exact = FALSE) - testthat::expect_length(p, 3) - testthat::expect_vector(p, nrow(1)) - } - ) -}) - -# Test check: compute_probit_prevalence(.summary_by = district) ---- -local({ - p <- anthro.03 |> - mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> - subset(district == "Metuge" | district == "Maravia") |> - compute_probit_prevalence(.summary_by = district) - - ## The test ---- - testthat::test_that( - "compute_probit_prevalence() return the correct ouput object", - { - testthat::expect_length(p, 4) - testthat::expect_vector(p, nrow(2)) - } - ) -}) - -# Test check: compute_wfhz_prevalence() ---- -## When std =! problematic & !is.null(.wt) & !is.null(.edema) ---- -local({ - - ### Get the prevalence estimates ---- - p <- anthro.02 |> - compute_wfhz_prevalence(.edema = edema, .wt = "wtfactor", .summary_by = NULL) - - ### Expected results ---- - #### GAM estimates and uncertainty ---- - n_gam <- 121 - p_gam <- 4.3 - p_gam_lci <- 3.2 - p_gam_uci <- 5.4 - deff <- 1.58 - - #### SAM estimates and uncertainty ---- - n_sam <- 43 - p_sam <- 0.8 - p_sam_lci <- 0.2 - p_sam_uci <- 1.3 - - #### MAM estimates and uncertainty ---- - n_mam <- 78 - p_mam <- 3.5 - p_mam_lci <- 2.6 - p_mam_uci <- 4.5 - - #### Sum of weigths ---- - sum_wt <- 1752680 - - ### The test ---- - testthat::test_that( - "compute_wfhz_prevalence() yields correct estimates", - { - testthat::expect_equal(p[[1]][1], n_gam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_gam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_gam_uci) - testthat::expect_equal(round(p[[5]][1], 2), deff) - testthat::expect_equal(p[[6]][1], n_sam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_sam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_sam_uci) - testthat::expect_equal(p[[11]][1], n_mam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_mam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_mam_uci) - testthat::expect_equal(p[[16]][1], sum_wt) - } - ) -}) - -## When std =! problematic & !is.null(.wt) & is.null(.edema) ---- -local({ - - ### Get the prevalence estimates ---- - p <- anthro.02 |> - compute_wfhz_prevalence(.edema = NULL, .wt = "wtfactor", .summary_by = NULL) - - ### Expected results ---- - #### GAM estimates and uncertainty ---- - n_gam <- 107 - p_gam <- 3.5 - p_gam_lci <- 2.6 - p_gam_uci <- 4.5 - deff <- 1.43 - - #### SAM estimates and uncertainty ---- - n_sam <- 29 - p_sam <- 0.0 - p_sam_lci <- 0.0 - p_sam_uci <- 0.0 - - #### MAM estimates and uncertainty ---- - n_mam <- 78 - p_mam <- 3.5 - p_mam_lci <- 2.6 - p_mam_uci <- 4.5 - - #### Sum of weigths ---- - sum_wt <- 1752680 - - ### The test ---- - testthat::test_that( - "compute_wfhz_prevalence() yields correct estimates when edema is NULL", - { - testthat::expect_equal(p[[1]][1], n_gam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_gam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_gam_uci) - testthat::expect_equal(round(p[[5]][1], 2), deff) - testthat::expect_equal(p[[6]][1], n_sam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_sam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_sam_uci) - testthat::expect_equal(p[[11]][1], n_mam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_mam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_mam_uci) - testthat::expect_equal(p[[16]][1], sum_wt) - } - ) -}) - - -## When std != problematic & is.null(.wt) & !is.null(.edema) ---- -local({ - - ### Get the prevalence estimates ---- - p <- wfhz.01 |> - compute_wfhz_prevalence(.edema = edema, .summary_by = NULL) - - ### Expected results ---- - #### GAM estimates and uncertainty ---- - n_gam <- 25 - p_gam <- 7.4 - p_gam_lci <- 3.5 - p_gam_uci <- 11.3 - - #### SAM estimates and uncertainty ---- - n_sam <- 4 - p_sam <- 0.3 - p_sam_lci <- -0.3 - p_sam_uci <- 1.0 - - #### MAM estimates and uncertainty ---- - n_mam <- 21 - p_mam <- 7.0 - p_mam_lci <- 3.1 - p_mam_uci <- 11.0 - - ### The test ---- - testthat::test_that( - "compute_wfhz_prevalence() yields correct estimates when survey weights is - NULL", - { - testthat::expect_equal(p[[1]][1], n_gam) - testthat::expect_equal(round(p[[2]][1]*100, 1), p_gam) - testthat::expect_equal(round(p[[3]][1]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[4]][1]*100, 1), p_gam_uci) - testthat::expect_equal(p[[6]][1], n_sam) - testthat::expect_equal(round(p[[7]][1]*100, 1), p_sam) - testthat::expect_equal(round(p[[8]][1]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[9]][1]*100, 1), p_sam_uci) - testthat::expect_equal(p[[11]][1], n_mam) - testthat::expect_equal(round(p[[12]][1]*100, 1), p_mam) - testthat::expect_equal(round(p[[13]][1]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[14]][1]*100, 1), p_mam_uci) - } - ) -}) - -## When std =! problematic & !is.null(.wt) with .summary_by = province ---- -local({ - - ### Get the prevalence estimates ---- - p <- anthro.02 |> - compute_wfhz_prevalence( - .edema = edema, - .wt = "wtfactor", - .summary_by = province - ) - - ### Expected results for Nampula province ---- - #### GAM estimates and uncertainty ---- - n_gam <- 80 - p_gam <- 5.9 - p_gam_lci <- 4.1 - p_gam_uci <- 7.8 - deff <- 1.52 - - #### SAM estimates and uncertainty ---- - n_sam <- 33 - p_sam <- 1.3 - p_sam_lci <- 0.3 - p_sam_uci <- 2.3 - - #### MAM estimates and uncertainty ---- - n_mam <- 47 - p_mam <- 4.7 - p_mam_lci <- 3.1 - p_mam_uci <- 6.2 - - #### Sum of weigths ---- - sum_wt <- 878704 - - ### The test ---- - testthat::test_that( - "compute_wfhz_prevalence() yields correct estimates when .summary_by is - used", - { - testthat::expect_equal(p[[2]][2], n_gam) - testthat::expect_equal(round(p[[3]][2]*100, 1), p_gam) - testthat::expect_equal(round(p[[4]][2]*100, 1), p_gam_lci) - testthat::expect_equal(round(p[[5]][2]*100, 1), p_gam_uci) - testthat::expect_equal(round(p[[6]][2], 2), deff) - testthat::expect_equal(p[[7]][2], n_sam) - testthat::expect_equal(round(p[[8]][2]*100, 1), p_sam) - testthat::expect_equal(round(p[[9]][2]*100, 1), p_sam_lci) - testthat::expect_equal(round(p[[10]][2]*100, 1), p_sam_uci) - testthat::expect_equal(p[[12]][2], n_mam) - testthat::expect_equal(round(p[[13]][2]*100, 1), p_mam) - testthat::expect_equal(round(p[[14]][2]*100, 1), p_mam_lci) - testthat::expect_equal(round(p[[15]][2]*100, 1), p_mam_uci) - testthat::expect_equal(p[[17]][2], sum_wt) - } - ) -}) - -## When std == problematic & is.null(.wt) ---- -# To access problematic SD's in antho.03, .summary_by has to be not null ---- -local({ - - ### Get the prevalence estimates ---- - p <- anthro.03 |> - mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> - compute_wfhz_prevalence(.edema = edema, .summary_by = district) - - ### Subset the dataframe for the district "Metuge" with problematic SD ---- - metuge_df <- subset(p, district == "Metuge") - - ### Subset the dataframe for the district "Maravia" with problematic SD ---- - maravia_df <- subset(p, district == "Maravia") - - ### Subset the dataframe for the district "Chiuta" with != problematic SD --- - chiuta_df <- subset(p, district == "Chiuta") - - columns_to_check <- c("gam_n", "gam_p_low", "gam_p_upp", "sam_n", - "sam_p_low", "sam_p_upp", "mam_n", "mam_p_low", - "mam_p_upp", "wt_pop") - - ### The test ---- - - testthat::test_that( - "compute_wfhz_prevalence() works well on a dataframe with multiple survey areas with - different SD's classification", - { - testthat::expect_vector(dplyr::select(p, !district), size = 4, ncol(17)) - testthat::expect_s3_class(p, "tbl") - testthat::expect_true(all(sapply(metuge_df[columns_to_check], \(.) all(is.na(.))))) - testthat::expect_true(all(sapply(maravia_df[columns_to_check], \(.) all(is.na(.))))) - testthat::expect_false(all(sapply(chiuta_df[columns_to_check], \(.) all(is.na(.))))) - } - ) -}) diff --git a/vignettes/harvard-cite-them-right-11th-edition.csl b/vignettes/harvard-cite-them-right-11th-edition.csl new file mode 100644 index 0000000..de05325 --- /dev/null +++ b/vignettes/harvard-cite-them-right-11th-edition.csl @@ -0,0 +1,318 @@ + + diff --git a/vignettes/ipc_amn_check.qmd b/vignettes/ipc_amn_check.qmd index e5f6f4d..d356768 100644 --- a/vignettes/ipc_amn_check.qmd +++ b/vignettes/ipc_amn_check.qmd @@ -1,6 +1,12 @@ --- title: "Checking if IPC Acute Malnutrition sample size requirements were met" author: Tomás Zaba +bibliography: references.bib +csl: harvard-cite-them-right-11th-edition.csl +knitr: + opts_chunk: + collapse: true + comment: "#>" vignette: > %\VignetteIndexEntry{Checking if IPC Acute Malnutrition sample size requirements were met} %\VignetteEngine{quarto::html} @@ -12,11 +18,11 @@ vignette: > library(mwana) ``` -Evidence on the prevalence of acute malnutrition used in the IPC Acute Malnutrition (IPC AMN) can come from different sources, namely: representative surveys, screenings or community-based surveillance system (known as sentinel sites). The IPC set minimum sample size requirements for each of these sources. Details can be read from the [IPC Manual version 3.1 ](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). +Evidence on the prevalence of acute malnutrition used in the IPC Acute Malnutrition (IPC AMN) can come from different sources, namely: representative surveys, screenings or community-based surveillance system (known as sentinel sites). The IPC set minimum sample size requirements for each of these sources. Details can be read from the IPC Manual version 3.1 [@ipcmanual]. In the IPC AMN analysis workflow, the very first step of a data analyst is to check if these requirements were met. This is done for each area meant to be included in the IPC AMN analysis. For this, `mwana` provides a handy function: `mw_check_ipcamn_ssreq()`. -To demonstrate its usage, we will use a built-in sample dataset `anthro.01`. +To demonstrate its usage, we will use a built-in sample data set `anthro.01`. ```{r} #| label: view-data @@ -33,9 +39,9 @@ head(anthro.01) head(anthro.01) ``` -`anthro.01` contains anthropometry data from SMART surveys from anonymized locations. We can check further details about the dataset by calling `help(anthro.01)` in `R` console. +`anthro.01` contains anthropometry data from SMART surveys from anonymized locations. We can check further details about the data set by calling `help("anthro.01")` in `R` console. -Now that we are acquainted with the dataset, we can proceed to execute the task. To achieve this, we simply do: +Now that we got acquainted with the data set, we can proceed to execute the task. To achieve this, we simply do: ```{r} #| label: check @@ -76,11 +82,11 @@ anthro.01 |> A table (of class `tibble`) is returned with three columns: - + Column `n_clusters` counts the number of unique cluster IDs in the dataset. - + Column `n_obs` counts the number of children in the dataset. + + Column `n_clusters` counts the number of unique cluster or villages or community IDs in the data set where the activity took place. + + Column `n_obs` counts the number of children in the data set. + Column `meet_ipc` indicates whether the IPC AMN sample size requirements (for surveys in this case) were met or not. -The above output is not quite useful yet as we often deal with multiple area dataset. We can get a summarised table by area as follows: +The above output is not quite useful yet as we often deal with multiple-area data set. We can get a summarized table by area as follows: ```{r} #| label: group_by #| echo: true @@ -116,3 +122,5 @@ anthro.01 |> ``` For screening or sentinel site-based data, we approach the task the same way; we only have to change the `.source` parameter to "screening" or to "ssite" as appropriate, as well as to supply `cluster` with the right column name of the sub-areas inside the main area (villages, localities, comunas, communities, etc). + +# References diff --git a/vignettes/plausibility.qmd b/vignettes/plausibility.qmd index ed4e9e0..d7fa8c5 100644 --- a/vignettes/plausibility.qmd +++ b/vignettes/plausibility.qmd @@ -1,6 +1,12 @@ --- title: "Running plausibility checks" author: Tomás Zaba +bibliography: references.bib +csl: harvard-cite-them-right-11th-edition.csl +knitr: + opts_chunk: + collapse: true + comment: "#>" vignette: > %\VignetteIndexEntry{Running plausibility checks} %\VignetteEngine{quarto::html} @@ -20,7 +26,7 @@ Plausibility check is a tool that evaluates the overall quality and acceptabilit `mwana` provides a set of handy functions to facilitate this evaluation. These functions allow users to assess the acceptability of weight-for-height z-score (WFHZ) and mid upper-arm circumference (MUAC) data. The evaluation of the latter can be done on the basis of MUAC-for-age z-score (MFAZ) or raw MUAC values. -In this vignette, we will learn how to use these functions and when to consider using MFAZ plausibility check over the one based on raw MUAC values. For demonstration, we will use a `mwana` built-in sample dataset named `anthro.01`. This dataset is contains district level SMART surveys from anonymized locations. Do `?anthro.01` in `R` console to read more about the dataset. +In this vignette, we will learn how to use these functions and when to consider using MFAZ plausibility check over the one based on raw MUAC values. For demonstration, we will use a `mwana` built-in sample data set named `anthro.01`. This data set contains district level SMART surveys from anonymized locations. Do `?anthro.01` in `R` console to read more about it. We will begin the demonstration with the plausibility check that you are most familiar with and then proceed to the ones you are less familiar with. @@ -28,15 +34,15 @@ We will begin the demonstration with the plausibility check that you are most fa We check the plausibility of WFHZ data by calling the `mw_plausibility_check_wfhz()` function. Before doing that, we need ensure the data is in the right "shape and format" that is accepted and understood by the function. Don't worry, you will soon learn how to get there. But first, let's take a moment to walk you through some key features about this function. -`mw_plausibility_check_wfhz()` is a replica of the plausibility check in ENA for SMART software of the [SMART Methodology](https://smartmethodology.org/). Under the hood, it runs the same statistical tests you already know from SMART, and it applies the same rating and scoring criteria. Beware though that there are some small differences to have in mind: +`mw_plausibility_check_wfhz()` is a replica of the plausibility check in ENA for SMART software of the SMART Methodology [@smart2017]. Under the hood, it runs the same test suite you already know from SMART; it also applies the same rating and scoring criteria. Beware though that there are some small differences to have in mind: (i) `mw_plausibility_check_wfhz()` does not include MUAC in its test suite. This is simply due the fact that now you can run a more comprehensive test suite for MUAC. - (ii) `mw_plausibility_check_wfhz()` allows user to run checks on a multiple area dataset at once, without having to repeat the same workflow over and over again for the number of areas the data holds. + (ii) `mw_plausibility_check_wfhz()` allows user to run checks on a multiple-area data set at once, without having to repeat the same workflow over and over again for the number of areas the data holds. That is it! Now we can begin delving into the "how to". -It is always a good practice to start off by inspecting the dataset. Let's check the first 6 rows of our dataset: +It is always a good practice to start off by inspecting our data set. Let's check the first 6 rows of our data set: ```{r} #| label: data @@ -54,13 +60,13 @@ head(anthro.01) head(anthro.01) ``` -We can see that the dataset has eleven variables, and the way how their respective values are presented. This is useful to inform the data wrangling workflow. +We can see that the data set has eleven variables, and the way how their respective values are presented. This is useful to inform the data wrangling workflow. ### Data wrangling -As mentioned somewhere above, before we supply a data object to `mw_plausibility_check_wfhz()`, we need to wrangle it first. This task is executed by `mw_wrangle_age()` and `mw_wrangle_wfhz()`. Read more about the technical documentation by doing `help(mw_wrangle_age)` or `help(mw_wrangle_wfhz)` in `R` console. +As mentioned somewhere above, before we supply a data object to `mw_plausibility_check_wfhz()`, we need to wrangle it first. This task is executed by `mw_wrangle_age()` and `mw_wrangle_wfhz()`. Read more about the technical documentation by doing `help("mw_wrangle_age")` or `help("mw_wrangle_wfhz")` in `R` console. -#### Age {#sec-age} +#### Wrangling age {#sec-age} We use `mw_wrangle_age()` to calculate child's age in months based on the date of data collection and child's date of birth. This is done as follows: @@ -113,13 +119,13 @@ wrangled_df <- anthro.01 |> ) ``` -In this example, the argument `.recode_sex` was set to `TRUE`. That is because under the hood, to compute the z-scores, a task made possible thanks to the [zscorer](https://cran.r-project.org/web/packages/zscorer/index.htmlpackage) package, it uses sex coded into 1 and 2 for male and female, respectively. This means that if our sex variable is already in 1 and 2's, we would set it to `FALSE`. +In this example, the argument `.recode_sex` was set to `TRUE`. That is because under the hood, to compute the z-scores, a task made possible thanks to the {`zscorer`} package [@zscorer], it uses sex coded into 1 and 2 for male and female, respectively. This means that if our sex variable is already in 1 and 2's, we would set it to `FALSE`. :::{.callout-note} If by any chance your sex variable is coded in any other different way than aforementioned, then you will have to recode it outside `mwana` utilities and then set `.recode_sex` accordingly. ::: -Under the hood, after recoding (or not) the sex variables, `mw_wrangle_wfhz()` computes the z-scores, then identifies outliers and adds them to the dataset. Two new variables (`wfhz` and `flag_wfhz`) are created and added to the dataset. We can see this below: +Under the hood, after recoding (or not) the sex variables, `mw_wrangle_wfhz()` computes the z-scores, then identifies outliers and adds them to the data set. Two new variables (`wfhz` and `flag_wfhz`) are created and added to the data set. We can see this below: ```{r} #| label: view_df @@ -281,7 +287,7 @@ anthro.01 |> An already formatted table, with scientific notations converted to standard notations, etc. -When working on a multiple area dataset, for instance districts, we can check the plausibility of all districts in the dataset at once by simply chaining the previous workflow with `group_by()` function from the `dplyr` package: +When working on a multiple-area data set, for instance districts, we can check the plausibility of all districts in the data set at once by using `group_by()` function from the {`dplyr`} package as follows: ```{r} #| label: pl_group_by @@ -314,6 +320,7 @@ anthro.01 |> height = height, flags = flag_wfhz ) |> + group_by(area) |> mw_neat_output_wfhz() ``` @@ -348,6 +355,7 @@ anthro.01 |> height = height, flags = flag_wfhz ) |> + group_by(area) |> mw_neat_output_wfhz() ``` @@ -355,19 +363,20 @@ At this point, you have reached the end of your workflow 🎉 . ## Plausibility check of MFAZ data -We will assess the plausibility of MUAC data through MFAZ if we have age variable available in our dataset. +We will assess the plausibility of MUAC data through MFAZ if we have age variable available in our data set. :::{.callout-note} -The plausibility check for MFAZ data was built based on the insights gotten from [Bilukha, O., & Kianian, B. (2023)](https://doi.org/10.1111/mcn.13478) research presented at the [2023 High-Level Technical Assessment Workshop](https://smartmethodology.org/wp-content/uploads/2024/03/2023-High-level-Technical-Assessment-Workshop-Report.pdf) held in Nairobi, Kenya. Results from this research suggested a feasibility of applying the similar plausibility check as that in WFHZ for MFAZ, with a difference in the amount of flags to be considered: maximum of 2%. +The plausibility check for MFAZ data was built based on the insights gotten from @bilukha research presented at the 2023 High-Level Technical Assessment Workshop held in Nairobi, Kenya [@smarthighlevel]. Results from this research suggested a feasibility of applying the similar plausibility check as that of WFHZ for MFAZ, with a maximum acceptability of percent of flagged +records of 2.0%. ::: -We can run MFAZ plausibility check by calling `mw_plausibility_check_mfaz()`. As in WFHZ, we first need to ensure that the data is in the right shape and format that is accepted and understood by the function. The workflow starts with processing age; for this, we approach the same way as in @sec-age. +We can run MFAZ plausibility check by calling `mw_plausibility_check_mfaz()`. As in WFHZ, we first need to ensure that the data is in the right shape and format that is accepted and understood by the function. The workflow starts with wrangling age; for this, we approach the same way as in @sec-age. :::{.callout-important} ## Age ratio test in MFAZ As you know, the age ratio test in WFHZ is done on children aged 6 to 29 months old over those aged 30 to 59 months old. -This is different in MFAZ. The test is done on children aged 6 to 23 months over those aged 24 to 59 months old. This is as in [SMART MUAC Tool](https://smartmethodology.org/survey-planning-tools/updated-muac-tool/). The test results is also used in the prevalence analysis to implement what the SMART MUAC tool does. This is further demonstrated in the vignette about prevalence. +This is different in MFAZ. The test is done on children aged 6 to 23 months over those aged 24 to 59 months old. This is as in the SMART MUAC Tool [@smartmuactool]. The test results is also used in the prevalence analysis to implement what the SMART MUAC tool does. This is further demonstrated in the vignette about prevalence. ::: ### Wrangling MFAZ data {#sec-wrangle_mfaz} @@ -396,7 +405,7 @@ anthro.01 |> ) ``` -Just as in WFHZ wrangler, under the hood, `mw_wrangle_muac()` computes the z-scores then identifies outliers and flags them. These are stored in the `mfaz` and `flag_mfaz` variables that are created and added to the dataset. +Just as in WFHZ wrangler, under the hood, `mw_wrangle_muac()` computes the z-scores then identifies outliers and flags them. These are stored in the `mfaz` and `flag_mfaz` variables that are created and added to the data set. The above code returns: @@ -423,7 +432,7 @@ anthro.01 |> ``` :::{.callout-note} -`mw_wrangle_muac()` accepts MUAC values in centimeters. This is why it takes the arguments `.recode_muac` and `.to` to control whether there is need to transform the variable `muac`function or not. Read the function documentation to learn how to control these two arguments. +`mw_wrangle_muac()` accepts MUAC values in centimeters. This is why it takes the arguments `.recode_muac` and `.to` to control whether there is need to transform the variable `muac` or not. Read the function documentation to learn about how to control these two arguments. ::: ### On to *de facto* plausibility check of MFAZ data @@ -566,7 +575,7 @@ anthro.01 |> mw_neat_output_mfaz() ``` -We can also run checks on a multiple area dataset as follows: +We can also run checks on a multiple-area data set as follows: ```{r} #| label: grouped_mfaz @@ -600,6 +609,7 @@ anthro.01 |> age = age, flags = flag_mfaz ) |> + group_by(area) |> mw_neat_output_mfaz() ``` @@ -636,6 +646,7 @@ anthro.01 |> age = age, flags = flag_mfaz ) |> + group_by(area) |> mw_neat_output_mfaz() ``` @@ -643,13 +654,13 @@ At this point, you have reached the end of your workflow ✨. ## Plausibility check of raw MUAC data -We will assess the plausibility of raw MUAC data through it's raw values when the variable age is not available in the dataset. This is a job assigned to `mw_plausibility_check_muac()`. The workflow for this check is the shortest one. +We will assess the plausibility of raw MUAC data through it's raw values when the variable age is not available in the data set. This is a job assigned to `mw_plausibility_check_muac()`. The workflow for this check is the shortest one. ### Data wrangling -As you can tell, z-scores cannot be computed in the absence of age. In this way, the data wrangling workflow would be quite minimal. You still set the arguments inside `mw_wrangle_muac()` as learned in @sec-wrangle_mfaz. The only difference is that here we will set `age` to `NULL`. Fundamentally, under the hood the function detects MUAC values that are outliers and flags them and stores them in `flag_muac` variable that is added to the dataset. +As you can tell, z-scores cannot be computed in the absence of age. In this way, the data wrangling workflow would be quite minimal. You still set the arguments inside `mw_wrangle_muac()` as learned in @sec-wrangle_mfaz. The only difference is that here we will set `age` to `NULL`. Fundamentally, under the hood the function detects MUAC values that are outliers and flags them and stores them in `flag_muac` variable that is added to the data set. -We will continue using the same dataset: +We will continue using the same data set: ```{r} #| label: wrangle_muac @@ -757,7 +768,7 @@ anthro.01 |> mw_neat_output_muac() ``` -And we get +And we get: ```{r} #| label: view_pretty_tabl_muac @@ -781,7 +792,7 @@ anthro.01 |> ``` -When working on multiple area data, we approach the task the same way as demonstrated above: +When working on multiple-area data, we approach the task the same way as demonstrated above: ```{r} #| label: by_area @@ -807,6 +818,7 @@ anthro.01 |> flags = flag_muac, muac = muac ) |> + group_by(area) |> mw_neat_output_muac() ``` @@ -834,5 +846,8 @@ anthro.01 |> flags = flag_muac, muac = muac ) |> + group_by(area) |> mw_neat_output_muac() ``` + +# References diff --git a/vignettes/prevalence.qmd b/vignettes/prevalence.qmd index 7c79d5d..22ef74e 100644 --- a/vignettes/prevalence.qmd +++ b/vignettes/prevalence.qmd @@ -1,6 +1,10 @@ --- title: "Estimating the prevalence of wasting" author: Tomás Zaba +knitr: + opts_chunk: + collapse: true + comment: "#>" vignette: > %\VignetteIndexEntry{Estimating the prevalence of wasting} %\VignetteEngine{quarto::html} @@ -16,30 +20,30 @@ library(mwana) ## Introduction This vignette demonstrates how to use the `mwana` package's functions to estimate the prevalence of wasting. The package allow users to estimate prevalence based on: - + The weight-for-height z-score (WFHZ) and/or edema; - + The raw MUAC values and/or edema; - + The combined prevalence and - + The MUAC-for-age z-score (MFAZ) and/or edema. + + Weight-for-height z-score (WFHZ) and/or edema; + + Raw MUAC values and/or edema; + + MUAC-for-age z-score (MFAZ) and/or edema, and + + Combined prevalence. -The prevalence functions in `mwana` were carefully conceived and designed to simplify the workflow of a nutrition data analyst, especially when dealing with datasets containing imperfections that require additional layers of analysis. Let's try to clarify this with two scenarios that I believe will remind you of the complexity involved: +The prevalence functions in `mwana` were carefully conceived and designed to simplify the workflow of a nutrition data analyst, especially when dealing with data sets containing imperfections that require additional layers of analysis. Let's try to clarify this with two scenarios that I believe will remind you of the complexity involved: - + When analysing a multi-area dataset, users will likely need to estimate the prevalence for each area individually. Afterward, they must extract the results and collate in a summary table. + + When analysing a multi-area data set, users will likely need to estimate the prevalence for each area individually. Afterward, they must extract the results and collate in a summary table to share. - + When working with MUAC data, if age ratio test is rated as problematic, an additional tool is required to weight the prevalence and correct for age bias. In unfortunate cases where multiple areas face this issue, the workflow must be repeated several times, making the process cumbersome and highly error-prone 😬. + + When working with MUAC data, if age ratio test is rated as problematic, an additional tool is required to weight the prevalence and correct for age bias, thus the associated likely overestimation of the prevalence. In unfortunate cases where multiple areas face this issue, the workflow must be repeated several times, making the process cumbersome and highly error-prone 😬. -With `mwana` you no longer have to worry about this 🥳 as the functions are designed to deal with that. To demonstrate their use, we will use different datasets containing some imperfections alluded above: +With `mwana` you no longer have to worry about this 🥳 as the functions are designed to deal with that. To demonstrate their use, we will use different data sets containing some imperfections alluded above: + `anthro.02` : a survey data with survey weights. Read more about this data with `?anthro.02`. - + `anthro.03` : district-level SMART surveys with two districts whose WFHZ standard deviation are rated as problematic while the rest are within range. Do `?anthro.03` for more details. + + `anthro.03` : district-level SMART surveys with two districts whose WFHZ standard deviations are rated as problematic while the rest are within range. Do `?anthro.03` for more details. + `anthro.04` : a community-based sentinel site data. The data has different characteristics that require different analysis approaches. Now we can begin delving into each function. ### Estimation of the prevalence of wasting based on WFHZ {#sec-prevalence-wfhz} -To estimate the prevalence of wasting based on WFHZ we use the `compute_wfhz_prevalence()` function. The dataset to supply must have been wrangled by `mw_wrangle_wfhz()`. +To estimate the prevalence of wasting based on WFHZ we use the `mw_estimate_prevalence_wfhz()` function. The data set to supply must have been wrangled by `mw_wrangle_wfhz()`. -As usual, we start off by inspecting our dataset: +As usual, we start off by inspecting our data set: ```{r} #| label: inspect_anthro.02 @@ -56,7 +60,7 @@ tail(anthro.02) tail(anthro.02) ``` -We can see that the dataset contains the required variables for a WFHZ prevalence analysis, including for a weighted analysis. This dataset has already been wrangled, so we do not need to call the WFHZ wrangler in this case. We will begin the demonstration with an unweigthed analysis - typical of SMART surveys - and then we proceed to a weighted analysis. +We can see that the data set contains the required variables for a WFHZ prevalence analysis, including for a weighted analysis. This data set has already been wrangled, so we do not need to call the WFHZ wrangler in this case. We will begin the demonstration with an unweigthed analysis - typical of SMART surveys - and then we proceed to a weighted analysis. #### Estimation of unweighted prevalence @@ -67,10 +71,10 @@ To achieve this we do: #| eval: false anthro.02 |> - compute_wfhz_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = NULL + mw_estimate_prevalence_wfhz( + wt = NULL, + edema = edema, + .by = NULL ) ``` @@ -79,15 +83,15 @@ This will return: #| label: view_unwt_wast_wfhz #| echo: false -compute_wfhz_prevalence( +mw_estimate_prevalence_wfhz( df = anthro.02, - .wt = NULL, - .edema = edema, - .summary_by = NULL + wt = NULL, + edema = edema, + .by = NULL ) ``` -If for some reason the variable edema is not available in the dataset, or it's there but not plausible, we can exclude it from the analysis by setting the argument `.edema` to `NULL`: +If for some reason the variable edema is not available in the data set, or it's there but not plausible, we can exclude it from the analysis by setting the argument `edema` to `NULL`: ```{r} #| label: unwt_wast_wfhz_noedema @@ -95,10 +99,10 @@ If for some reason the variable edema is not available in the dataset, or it's t #| eval: false anthro.02 |> - compute_wfhz_prevalence( - .wt = NULL, - .edema = NULL, # Setting .edema to NULL - .summary_by = NULL + mw_estimate_prevalence_wfhz( + wt = NULL, + edema = NULL, # Setting edema to NULL + .by = NULL ) ``` @@ -108,17 +112,17 @@ And we get: #| label: view_unwt_wast_wfhz_noedema #| echo: false -compute_wfhz_prevalence( +mw_estimate_prevalence_wfhz( df = anthro.02, - .wt = NULL, - .edema = NULL, - .summary_by = NULL + wt = NULL, + edema = NULL, + .by = NULL ) ``` -If we inspect the `gam_n` and `gam_p` columns of this output table and the previous, we notice differences in the numbers. This occurs because edema cases were excluded in the second implementation. Note that you will observed a change if there are positive cases of edema in the dataset; otherwise, setting `.edema = NULL` will have no effect whatsoever. +If we inspect the `gam_n` and `gam_p` columns of this output table and the previous, we notice differences in the numbers. This occurs because edema cases were excluded in the second implementation. Note that you will observed a change if there are positive cases of edema in the data set; otherwise, setting `edema = NULL` will have no effect whatsoever. -The above output summary does not show results by province. We can control that using the `.summary_by` argument. In the above examples, it was set to `NULL`; now let's pass the name of the column containing the locations where the data was collected. In our case, the column is `province`: +The above output summary does not show results by province. We can control that using the `.by` argument. In the above examples, it was set to `NULL`; now let's pass the name of the column containing the locations where the data was collected. In our case, the column is `province`: ```{r} #| label: unwt_wast_wfhz_province @@ -126,10 +130,10 @@ The above output summary does not show results by province. We can control that #| eval: false anthro.02 |> - compute_wfhz_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = province # province is the variable's name holding data on where the survey was conducted. + mw_estimate_prevalence_wfhz( + wt = NULL, + edema = edema, + .by = province # province is the variable's name holding data on where the survey was conducted. ) ``` @@ -140,10 +144,10 @@ And _voila_ : #| echo: false anthro.02 |> - compute_wfhz_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = province + mw_estimate_prevalence_wfhz( + wt = NULL, + edema = edema, + .by = province ) ``` @@ -151,7 +155,7 @@ A table with two rows is returned with each province's statistics. #### Estimation of weighted prevalence -To get the weighted prevalence, we make the use of the `.wt` argument. We pass to it the column name containing the final survey weights. In our case, the column name is `wtfactor`. We pass it in quotation `" "`: +To get the weighted prevalence, we make the use of the `wt` argument. We pass to it the column name containing the final survey weights. In our case, the column name is `wtfactor`: ```{r} #| label: wt_wasting_wfhz @@ -159,10 +163,10 @@ To get the weighted prevalence, we make the use of the `.wt` argument. We pass t #| eval: false anthro.02 |> - compute_wfhz_prevalence( - .wt = "wtfactor", # Passing the wtfactor to .wt - .edema = edema, - .summary_by = province + mw_estimate_prevalence_wfhz( + wt = wtfactor, # Passing the wtfactor to wt + edema = edema, + .by = province ) ``` @@ -170,25 +174,25 @@ And you get: ```{r} #| label: view_wt_wasting_wfhz -#| echo: true +#| echo: false anthro.02 |> - compute_wfhz_prevalence( - .wt = "wtfactor", - .edema = edema, - .summary_by = province + mw_estimate_prevalence_wfhz( + wt = wtfactor, + edema = edema, + .by = province ) ``` :::{.callout-note} -## The work under the hood of `compute_wfhz_prevalence()` +## The work under the hood of `mw_estimate_prevalence_wfhz` -Under the hood, before starting the prevalence estimation, the function first checks the quality of the WFHZ standard deviation. If it is not rated as problematic, it proceeds with a complex sample-based analysis; otherwise, prevalence is estimated applying the PROBIT method. This is as you see in the body of the plausibility report generated by ENA. The `anthro.02` dataset has no such issues, so you don't see `compute_wfhz_prevalence()` in action on this regard. To see that, let's use the `anthro.03` dataset: . +Under the hood, before starting the prevalence estimation, the function first checks the quality of the WFHZ standard deviation. If it is not rated as problematic, it proceeds with a complex sample-based analysis; otherwise, prevalence is estimated applying the PROBIT method. This is as you see in the body of the plausibility report generated by ENA. The `anthro.02` data set has no such issues, so you don't see `mw_estimate_prevalence_wfhz` in action on this regard. To see that, let's use the `anthro.03` data set. ::: `anthro.03` contains problematic standard deviation in Metuge and Maravia districts, while the remaining districts are within range. -Let's inspect our dataset: +Let's inspect our data set: ```{r} #| label: anthro.3 @@ -197,7 +201,7 @@ Let's inspect our dataset: head(anthro.03) ``` -Now let's apply the prevalence function: +Now let's apply the prevalence function. This is data is not wrangled, so we will have to wrangle it before passing to the prevalence function: ```{r} #| label: anthro.3_prev @@ -211,10 +215,10 @@ anthro.03 |> height = height, weight = weight ) |> - compute_wfhz_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = district + mw_estimate_prevalence_wfhz( + wt = NULL, + edema = edema, + .by = district ) ``` @@ -231,10 +235,10 @@ anthro.03 |> height = height, weight = weight ) |> - compute_wfhz_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = district + mw_estimate_prevalence_wfhz( + wt = NULL, + edema = edema, + .by = district ) ``` @@ -242,11 +246,11 @@ Can you spot the differences? 😎 Yes, you're absolutely correct! While in Caho ### Estimation of the prevalence of wasting based on MFAZ -The prevalence of wasting based on MFAZ can be estimated using the `compute_mfaz_prevalence()` function. This function works and is implemented the same way as demonstrated in @sec-prevalence-wfhz, with the exception of the data wrangling that is based on MUAC. This was demonstrated in the [plausibility checks](https://nutriverse.io/mwana/articles/plausibility.html). In this way, to avoid redundancy, we will not demonstrate the workflow. +The prevalence of wasting based on MFAZ can be estimated using the `mw_estimate_prevalence_mfaz()` function. This function works and is implemented the same way as demonstrated in @sec-prevalence-wfhz, with the exception of the data wrangling that is based on MUAC. This was demonstrated in the [plausibility checks](https://nutriverse.io/mwana/articles/plausibility.html). In this way, to avoid redundancy, we will not demonstrate the workflow. -### Estimation of the prevalence of wasting based on the raw MUAC values {#sec-prevalence-muac} +### Estimation of the prevalence of wasting based on raw MUAC values {#sec-prevalence-muac} -This job is assigned to `compute_muac_prevalence()`. Once you call the function, before starting the prevalence estimation, it first evaluates the acceptability of the MFAZ standard deviation and the age ratio test. Yes, you read well, MFAZ's standard deviation, not on the raw values MUAC. +This job is assigned to `mw_estimate_prevalence_muac()`. Once you call the function, before starting the prevalence estimation, it first evaluates the acceptability of the MFAZ standard deviation and the age ratio test. Yes, you read well, MFAZ's standard deviation, not on the raw values MUAC. :::{.callout-important} Although the acceptability is evaluated on the basis of MFAZ, the actual prevalence is estimated on the basis of the raw MUAC values. MFAZ is also used to detect outliers and flag them to be excluded from the prevalence analysis. @@ -255,18 +259,18 @@ Although the acceptability is evaluated on the basis of MFAZ, the actual prevale The MFAZ standard deviation and the age ratio test results are used to control the prevalence analysis flow in this way: + If the MFAZ standard deviation and the age ratio test are both not problematic, a standard complex sample-based prevalence is estimated. - + If the MFAZ standard deviation is not problematic but the age ratio test is problematic, the CDC/SMART MUAC tool age-weighting approach is applied. + + If the MFAZ standard deviation is not problematic but the age ratio test is problematic, the SMART MUAC tool age-weighting approach is applied. + If the MFAZ standard deviation is problematic, even if age ratio is not problematic, no prevalence analysis is estimated, instead `NA` are thrown. -When working with a multiple-area dataset, these conditionals will still be applied according to each area's situation. +When working with a multiple-area data set, these conditionals will still be applied according to each area's situation. :::{.callout-note} -## How does it work on a multi-area dataset +## How does it work on a multi-area data set -Fundamentally, the function performs the standard deviation and age ratio tests, evaluates their acceptability, and returns a summarized table by area. It then iterates over that summary table row by row checking the above conditionals. Based on the conditionals of each row (area), the function accesses the original dataset, computes the prevalence accordingly, and returns the results. +Fundamentally, the function performs the standard deviation and age ratio tests, evaluates their acceptability, and returns a summarized table by area. It then iterates over that summary table row by row checking the above conditionals. Based on the conditionals of each row (area), the function accesses the original data set, computes the prevalence accordingly, and returns the results. ::: -To demonstrate this we will use the `anthro.04` dataset. +To demonstrate this we will use the `anthro.04` data set. As usual, let's first inspect it: ```{r} @@ -288,10 +292,10 @@ As in ENA Software, make sure you run the plausibility check before you call the #| eval: false anthro.04 |> - compute_muac_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = province + mw_estimate_prevalence_muac( + wt = NULL, + edema = edema, + .by = province ) ``` @@ -302,18 +306,44 @@ This will return: #| echo: false anthro.04 |> - compute_muac_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = province + mw_estimate_prevalence_muac( + wt = NULL, + edema = edema, + .by = province ) ``` We see that in Province 1, all columns are filled with numbers; in Province 2, some columns are filled with numbers, while other columns are filled with `NA`s: this is where the age-weighting approach was applied. Lastly, in Province 3 a bunch of `NA` are filled everywhere - you know why 😉 . +Alternatively, we can choose to apply the function that calculates the age-weighted prevalence estimates inside `mw_estimate_prevalence_muac()` directly on to our data set. This can be done by calling the `mw_estimate_smart_age_wt()` function. It worth noting that although possible, we recommend to use the main function. This is simply due the fact that if we decide to use the function independently, then we must, before calling it, check the acceptability of the standard deviation of MFAZ and of the age ratio test, and then evaluate if the conditions that fits the use `mw_estimate_smart_age_wt()` are there. We would have to do that ourselves. This introduces some kind of cumbersomeness in the workflow, and along with that, a risk of picking a wrong analysis workflow. + +Nonetheless, if for any reason we decide to go for it anyway, then we would apply the function as demonstrated below. We will continue using the `anthro.04` data set. For this demonstration, we will just pull out the data set from `Province 2` where we already know that the conditions to apply `mw_estimate_smart_age_wt()` are met, and then we will pipe it in to the function: +```{r} +#| label: smart_wt +#| echo: true +#| eval: false + +anthro.04 |> + subset(province == "Province 2") |> + mw_estimate_smart_age_wt( + edema = edema, + .by = NULL + ) +``` + +This returns the following: +```{r} +#| label: smart_wt_view +#| echo: false + +anthro.04 |> + subset(province == "Province 2") |> + mw_estimate_smart_age_wt() +``` + #### Estimation of weighted prevalence -For this we go back `anthro.02` dataset. +For this we go back `anthro.02` data set. We approach this task as follows: @@ -342,10 +372,10 @@ anthro.02 |> mutate( muac = recode_muac(muac, .to = "mm") ) |> - compute_muac_prevalence( - .wt = "wtfactor", - .edema = edema, - .summary_by = province + mw_estimate_prevalence_muac( + wt = wtfactor, + edema = edema, + .by = province ) ``` @@ -374,24 +404,56 @@ anthro.02 |> mutate( muac = recode_muac(muac, .to = "mm") ) |> - compute_muac_prevalence( - .wt = "wtfactor", - .edema = edema, - .summary_by = province + mw_estimate_prevalence_muac( + wt = wtfactor, + edema = edema, + .by = province ) ``` :::{.callout-warning} -You may have noticed that in the above code block, we called the `recode_muac()` function inside `mutate()`. This is because after you use `mw_wrangle_muac()`, it puts the MUAC variable in centimeters. The `compute_muac_prevalence()` function was defined to accept MUAC in millimeters. Therefore, it must be converted to millimeters. +You may have noticed that in the above code block, we called the `recode_muac()` function inside `mutate()`. This is because after you use `mw_wrangle_muac()`, it puts the MUAC variable in centimeters. The `mw_estimate_prevalence_muac()` function was defined to accept MUAC in millimeters. Therefore, it must be converted to millimeters. ::: +#### Estimation for non survey data + +Thus far, the demonstration has been around survey data. However, it is also common in day-to-day practice to come across non survey data to analyse. Non survey data can be screenings or any other +kind of community-based surveillance data. With this kind of data, the analysis workflow usually consists in a simple estimation of the point prevalence and the counts of the positive cases, without necessarily estimating the uncertainty. `mwana` provides a handy function for this task: `mw_estimate_prevalence_screening()`. Under the hood, this function works exactly the same way as `mw_estimate_prevalence_muac()`. The only difference is that it was designed to deal with non survey data. + +To demonstrate its usage, we will use the `anthro.04` data set. +```{r} +#| label: non-survey +#| echo: true +#| eval: false + +anthro.04 |> + mw_estimate_prevalence_screening( + muac = muac, + edema = edema, + .by = province + ) +``` + +The returned output is: +```{r} +#| label: non-survey-view +#| echo: false + +anthro.04 |> + mw_estimate_prevalence_screening( + muac = muac, + edema = edema, + .by = province + ) +``` + ### Estimation of the combined prevalence of wasting -The estimation of the combined prevalence of wasting is a task attributed to the `compute_combined_prevalence()` function. The case-definition is based on the WFHZ, the raw MUAC values and edema. From the workflow standpoint, it combines the workflow demonstrated in @sec-prevalence-wfhz and in @sec-prevalence-muac. +The estimation of the combined prevalence of wasting is a task attributed to the `mw_estimate_prevalence_combined()` function. The case-definition is based on the WFHZ, the raw MUAC values and edema. From the workflow standpoint, it combines the workflow demonstrated in @sec-prevalence-wfhz and in @sec-prevalence-muac. -To demonstrate it's implementation we will use the `anthro.01` dataset. +To demonstrate it's implementation we will use the `anthro.01` data set. -Let's inspect our data: +Let's inspect the data: ```{r} #| label: view_anthro.01 #| echo: false @@ -401,7 +463,7 @@ head(anthro.01) #### Data wrangling -Fundamentally, it combines the wrangling workflow of WFHZ and MUAC data: +Fundamentally, it combines the data wrangling workflow of WFHZ and MUAC: ```{r} #| label: combined_wrangling #| echo: true @@ -437,7 +499,7 @@ anthro.01 |> ) ``` -This is to get the `wfhz` and `flag_wfhz` the `mfaz` and `flag_mfaz` added to the dataset. In the output below, we just selected these columns: +This is to get the `wfhz` and `flag_wfhz` the `mfaz` and `flag_mfaz` added to the data set. In the output below, we have just selected these columns: ```{r} #| label: view_combined_wrangling @@ -473,17 +535,17 @@ anthro.01 |> select(area, wfhz, flag_wfhz, mfaz, flag_mfaz) ``` -Under the hood, `compute_combined_prevalence()` applies the same analysis approach as in `compute_wfhz_prevalence()` and in `compute_muac_prevalence()`. It checks the acceptability of the standard deviation of WFHZ and MFAZ and of the age ratio test. The following conditionals are checked and applied: +Under the hood, `mw_estimate_prevalence_combined()` applies the same analysis approach as in `mw_estimate_prevalence_wfhz` and in `mw_estimate_prevalence_muac()`. It checks the acceptability of the standard deviation of WFHZ and MFAZ and of the age ratio test. The following conditionals are checked and applied: - + If the standard deviation of WFHZ, of MFAZ and the age ratio test are not problematic, the standard complex sample-based estimation is applied. - + If either the standard deviation of WFHZ or of MFAZ or the age ratio test is problematic, prevalence is not computed, and `NA` are thrown. + + If the standard deviation of WFHZ and of MFAZ, and the age ratio test are all concurrently not problematic, the standard complex sample-based estimation is applied. + + If any of the above is rated problematic, the prevalence is not computed and `NA`s are thrown. In this function, a concept of "combined flags" is used. :::{.callout-note} ## What is combined flag? -Combined flags consists of defining as flag any observation that is flagged in either `flag_wfhz` or `flag_mfaz` vectors. A new column `cflags` for combined flags is created and added to the dataset. This ensures that all flagged observations from both WFHZ and MFAZ data are excluded from the prevalence analysis. +Combined flags consists of defining as flag any observation that is flagged in either `flag_wfhz` or `flag_mfaz` vectors. A new column `cflags` for combined flags is created and added to the data set. This ensures that all flagged observations from both WFHZ and MFAZ data are excluded from the prevalence analysis. ::: | **flag_wfhz** | **flag_mfaz** | **cflags** | @@ -491,7 +553,7 @@ Combined flags consists of defining as flag any observation that is flagged in e | 1 | 0 | 1 | | 0 | 1 | 1 | | 0 | 0 | 0 | -: Overview of case-definition of combined flag {#tbl-Table 1} +: A glimpse of case-definition of combined flag {#tbl-Table 1} Now that we understand what happens under the hood, we can now proceed to implement it: @@ -529,10 +591,10 @@ anthro.01 |> height = height, .recode_sex = FALSE ) |> - compute_combined_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = area + mw_estimate_prevalence_combined( + wt = NULL, + edema = edema, + .by = area ) ``` @@ -568,10 +630,10 @@ anthro.01 |> height = height, .recode_sex = FALSE ) |> - compute_combined_prevalence( - .wt = NULL, - .edema = edema, - .summary_by = area + mw_estimate_prevalence_combined( + wt = NULL, + edema = edema, + .by = area ) ``` diff --git a/vignettes/references.bib b/vignettes/references.bib new file mode 100644 index 0000000..1fb25d2 --- /dev/null +++ b/vignettes/references.bib @@ -0,0 +1,51 @@ + +@manual{ipcmanual, + author = {{IPC Global Partners}}, + title = {Integrated Food Security Phase Classification Technical Manual Version 3.1: Evidence and Standards for Better Food Security and Nutrition Decisions}, + year = {2021}, + url = {https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/} +} + +@manual{smart2017, + author = {{SMART Initiative}}, + title = {Standardized Monitoring and Assessment for Relief and Transition}, + year = {2017}, + version = {2.0}, + organization = {Action Against Hunger Canada}, + url = {https://smartmethodology.org} +} + +@article{bilukha, + author = {Bilukha, Oleg and Kianian, Bahar}, + title = {Considerations for assessment of measurement quality of mid-upper arm circumference data in anthropometric surveys and mass nutritional screenings conducted in humanitarian and refugee settings}, + journal = {Maternal \& Child Nutrition}, + year = {2023}, + volume = {19}, + pages = {e13478}, + doi = {10.1111/mcn.13478}, + url = {https://doi.org/10.1111/mcn.13478} +} + +@manual{zscorer, + title = {zscorer: Child Anthropometry z-Score Calculator}, + author = {Mark Myatt and Ernest Guevarra}, + year = {2019}, + note = {R package version 0.3.1}, + url = {https://CRAN.R-project.org/package=zscorer} +} + +@techreport{smarthighlevel, + author = {{SMART Initiative}}, + title = {2023 High-Level Technical Assessment Workshop Report}, + year = {2023}, + month = {December}, + url = {https://smartmethodology.org/wp-content/uploads/2024/03/2023-High-level-Technical-Assessment-Workshop-Report.pdf} +} + +@misc{smartmuactool, + author = {{SMART Initiative}}, + title = {Updated SMART MUAC Tool}, + year = {n.d.}, + url = {https://smartmethodology.org/survey-planning-tools/updated-muac-tool/}, + note = {Accessed: 2024-11-16} +}