Initial commit

jokroese · jokroese · commit a1f1cddd57f2 · 2020-08-27T18:32:17.000+02:00
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,4 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+reference/
+^LICENSE\.md$
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+reference/
+inst/doc
+notes/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,26 @@
+Package: tidymrp
+Type: Package
+Title: Tidy multilevel regression and poststratification (MRP)
+Version: 0.0.0.9000
+Authors@R: person("Jobi", "Kroese", email = "jo@citizense.org",
+  role = c("aut", "cre"))
+Description: Run multilevel regression and poststratification analyses.
+    It provides functions useful for MRP workflows including creating poststratification frames,
+    poststratifying and visualising MRP results.
+License: AGPL-3
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.1.1
+Suggests: 
+    testthat,
+    knitr,
+    rmarkdown
+Imports: 
+    magrittr,
+    dplyr,
+    tibble,
+    brms,
+    tidybayes
+Depends: 
+    R (>= 2.10)
+VignetteBuilder: knitr
diff --git a/LICENSE.md b/LICENSE.md
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,3 @@
+# Generated by roxygen2: do not edit by hand
+
+export(create_poststratification_frame)
diff --git a/R/analyse.R b/R/analyse.R
@@ -0,0 +1,133 @@
+#' Create poststratification frame from a population level survey such as a census
+#'
+#' @param census
+#' @param strata_variables
+#' @param weight_column
+#'
+#' @return
+#' @export
+#'
+#' @examples
+create_poststratification_frame <- function(census, strata_variables, weight_column = 1) {
+  census %>%
+    dplyr::mutate(weight = {{ weight_column }}) %>%
+    dplyr::group_by(dplyr::across({{ strata_variables }})) %>%
+    dplyr::summarise(population_total = sum(weight),
+                     .groups = "drop")
+}
+
+
+#' Create post-stratified draws from a model
+#'
+#' @param model
+#' @param poststratification_frame
+#'
+#' @return
+#' @export
+#'
+#' @examples
+get_poststratified_draws <- function(model, new_data, weight_column = population_total) {
+  poststratified_draws <- model %>%
+    tidybayes::add_predicted_draws(newdata = new_data) %>%
+    # rename(prediction = .prediction) %>%
+    mutate(prediction_sum = .prediction * {{ weight_column }}) %>%
+    ungroup()
+
+  poststratified_draws
+}
+
+
+get_strata_estimates <- function(model, new_data, group_variables, weight_column = population_total, lower_confidence = 0.025, upper_confidence = 1-lower_confidence) {
+
+  model %>%
+    tidybayes::add_predicted_draws(newdata = new_data) %>%
+    ungroup() %>%
+    dplyr::group_by(dplyr::across({{ group_variables }}), .draw) %>%
+    dplyr::summarise(.prediction = mean(.prediction), .groups = "drop") %>%
+    ungroup() %>%
+    dplyr::group_by(dplyr::across({{ group_variables }})) %>%
+    dplyr::summarise(
+      mean = mean(.prediction),
+      lower = quantile(.prediction, lower_confidence),
+      upper = quantile(.prediction, upper_confidence),
+      .groups = "drop") %>%
+    ungroup()
+}
+
+#' Adds a population_proportion column to a poststratification frame.
+#'
+#' @param poststratification_frame
+#' @param model_variables
+#' @param estimates_by
+#' @param weight_column
+#'
+#' @return
+#' @export
+#'
+#' @examples
+add_proportion <- function(poststratification_frame, model_variables, estimates_by, weight_column = population_total) {
+
+  # variable_group <- poststratification_frame %>%
+  #   dplyr::select( {{ model_variables }} ) %>%
+  #   dplyr::select( -{{ estimates_by }} ) %>%
+  #   names()
+
+  # dataset %>%
+  #   ungroup() %>%
+  #   dplyr::group_by(dplyr::across({{ variable_group_3 }})) %>%
+  #   dplyr::summarise(result = max(col_4))
+
+  results_by_totals <- poststratification_frame %>%
+    dplyr::group_by(dplyr::across({{ estimates_by }})) %>%
+    dplyr::summarise(population_total_sum = sum({{ weight_column }}),
+                     .groups = "drop")
+
+  poststratification_frame %>%
+    dplyr::left_join(results_by_totals) %>%
+    dplyr::group_by(dplyr::across({{ model_variables }}), population_total_sum) %>%
+    dplyr::summarise(population_total = sum(population_total)) %>%
+    dplyr::mutate(strata_proportion = population_total/population_total_sum) %>%
+    select(-population_total_sum)
+    # dplyr::mutate(population_proportion = population_total/population_total_sum) %>%
+    # population
+    # dplyr::group_by()
+    #           population_proportion = {{ weight_column }}/population_total_sum,
+    #           .groups = "drop")
+}
+
+get_poststratified_estimates <- function(model, new_data, model_variables, estimates_by, weight_column = population_total, lower_confidence = 0.025, upper_confidence = 1-lower_confidence) {
+
+  # add way to automatically get model variables from model
+
+  poststratification_frame <- add_proportion(new_data, {{ model_variables }}, {{ estimates_by }})
+
+  model %>%
+    tidybayes::add_predicted_draws(newdata = poststratification_frame) %>%
+    ungroup() %>%
+    rename(strata_prediction = .prediction) %>%
+    dplyr::mutate(contributing_prediction = strata_prediction*strata_proportion) %>%
+    dplyr::group_by(dplyr::across({{ estimates_by }}), .draw) %>%
+    dplyr::summarise(pop_prediction = sum(contributing_prediction),
+                     .groups = "drop") %>%
+    ungroup() %>%
+    dplyr::group_by(dplyr::across({{ estimates_by }})) %>%
+    dplyr::summarise(
+      mean_estimate = mean(pop_prediction),
+      lower_estimate = quantile(pop_prediction, lower_confidence),
+      upper_estimate = quantile(pop_prediction, upper_confidence),
+      .groups = "drop") %>%
+    ungroup()
+
+}
+
+# poststratification_frame_2 <- add_proportion(poststratification_frame,
+#                                              model_variables = c(age_group, region),
+#                                              estimates_by = region)
+#
+# hi <- binary_model_1 %>%
+#   tidybayes::add_predicted_draws(newdata = poststratification_frame_2) %>%
+#   ungroup() %>%
+#   rename(strata_prediction = .prediction) %>%
+#   dplyr::mutate(contributing_prediction = strata_prediction*strata_proportion) %>%
+#   dplyr::group_by(region, .draw) %>%
+#   dplyr::summarise(pop_prediction = sum(contributing_prediction), .groups = "drop")
diff --git a/R/analyse.html b/R/analyse.html
diff --git a/R/visualise.R b/R/visualise.R
@@ -0,0 +1,13 @@
+visualise_model_effect <- function(data, post_stratified_estimates) {
+  post_stratified_estimates %>%
+    ggplot(aes(y = mean, x = forcats::fct_inorder(state), color = "MRP estimate")) +
+    geom_point() +
+    geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
+    geom_point(data = example_poll %>%
+                 group_by(state, supports_ALP) %>%
+                 summarise(n = n()) %>%
+                 group_by(state) %>%
+                 mutate(prop = n/sum(n)) %>%
+                 filter(supports_ALP==1),
+               aes(state, prop, color = "Raw data"))
+}
diff --git a/data-raw/simulated_data.R b/data-raw/simulated_data.R
@@ -0,0 +1,48 @@
+# example_census <- read.csv("https://github.com/RohanAlexander/mrp_workshop/blob/master/outputs/data/census_data.csv")
+
+region_options <- c("A", "B", "C", "D", "E")
+age_group_options <- c("16_to_24", "25_to_34", "35_to_44", "45_to_54", "55_to_64", "65_plus")
+age_options <- 16:80
+ethnicity_options <- 1:5
+gender_options <- c("male", "female", "other")
+
+
+census_size <- 1000000
+
+example_census <- tibble::tibble(
+  region = sample(region_options, size = census_size, replace = TRUE),
+  age_group = sample(age_group_options, size = census_size, replace = TRUE),
+  ethnicity = sample(ethnicity_options, size = census_size, replace = TRUE),
+  gender = sample(gender_options, size = census_size, replace = TRUE, prob = c(0.48, 0.48, 0.04))
+) %>%
+  dplyr::group_by(region, age_group, ethnicity, gender) %>%
+  dplyr::summarise(population_total = dplyr::n(),
+                  .groups = "drop")
+
+usethis::use_data(example_census, overwrite = TRUE)
+
+
+
+
+survey_size <- 17000
+
+
+example_survey <- tibble::tibble(
+  region = sample(region_options, size = survey_size, replace = TRUE),
+  age = sample(age_options, size = survey_size, replace = TRUE),
+  ethnicity = sample(ethnicity_options, size = survey_size, replace = TRUE),
+  gender = sample(gender_options, size = survey_size, replace = TRUE, prob = c(0.48, 0.48, 0.04)) %>%
+    forcats::as_factor(),
+  non_negative_response = rgamma(
+    n = survey_size,
+    shape = 1*age/max(age_options),
+    scale = 1),
+  binary_response = rbinom(
+    size = 1,
+    n = survey_size,
+    p = age/max(age_options)) %>%
+    as.numeric()
+)
+
+
+usethis::use_data(example_survey, overwrite = TRUE)
diff --git a/data/example_census.rda b/data/example_census.rda
diff --git a/data/example_survey.rda b/data/example_survey.rda
diff --git a/man/create_poststratification_frame.Rd b/man/create_poststratification_frame.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -0,0 +1,5 @@
+library(testthat)
+library(tidymrp)
+
+test_check("tidymrp")
+
diff --git a/tidymrp.Rproj b/tidymrp.Rproj
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/advanced_poststratification.Rmd b/vignettes/advanced_poststratification.Rmd
@@ -0,0 +1,21 @@
+---
+title: "Advanced Poststratification"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{advanced_poststratification}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(tidymrp)
+```
+
+
diff --git a/vignettes/creating_a_poststratification_frame.Rmd b/vignettes/creating_a_poststratification_frame.Rmd
@@ -0,0 +1,20 @@
+---
+title: "Creating a Poststratification Frame"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{creating_a_poststratification_frame}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(tidymrp)
+```
+
diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Generated by roxygen2: do not edit by hand`
	`2`	`+`
	`3`	`+export(create_poststratification_frame)`