Skip to content

Commit a1f1cdd

Browse files
committed
Initial commit
0 parents  commit a1f1cdd

18 files changed

+1547
-0
lines changed

.Rbuildignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
^.*\.Rproj$
2+
^\.Rproj\.user$
3+
reference/
4+
^LICENSE\.md$

.gitignore

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata
5+
reference/
6+
inst/doc
7+
notes/

DESCRIPTION

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Package: tidymrp
2+
Type: Package
3+
Title: Tidy multilevel regression and poststratification (MRP)
4+
Version: 0.0.0.9000
5+
Authors@R: person("Jobi", "Kroese", email = "[email protected]",
6+
role = c("aut", "cre"))
7+
Description: Run multilevel regression and poststratification analyses.
8+
It provides functions useful for MRP workflows including creating poststratification frames,
9+
poststratifying and visualising MRP results.
10+
License: AGPL-3
11+
Encoding: UTF-8
12+
LazyData: true
13+
RoxygenNote: 7.1.1
14+
Suggests:
15+
testthat,
16+
knitr,
17+
rmarkdown
18+
Imports:
19+
magrittr,
20+
dplyr,
21+
tibble,
22+
brms,
23+
tidybayes
24+
Depends:
25+
R (>= 2.10)
26+
VignetteBuilder: knitr

LICENSE.md

+659
Large diffs are not rendered by default.

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(create_poststratification_frame)

R/analyse.R

+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#' Create poststratification frame from a population level survey such as a census
2+
#'
3+
#' @param census
4+
#' @param strata_variables
5+
#' @param weight_column
6+
#'
7+
#' @return
8+
#' @export
9+
#'
10+
#' @examples
11+
create_poststratification_frame <- function(census, strata_variables, weight_column = 1) {
12+
census %>%
13+
dplyr::mutate(weight = {{ weight_column }}) %>%
14+
dplyr::group_by(dplyr::across({{ strata_variables }})) %>%
15+
dplyr::summarise(population_total = sum(weight),
16+
.groups = "drop")
17+
}
18+
19+
20+
#' Create post-stratified draws from a model
21+
#'
22+
#' @param model
23+
#' @param poststratification_frame
24+
#'
25+
#' @return
26+
#' @export
27+
#'
28+
#' @examples
29+
get_poststratified_draws <- function(model, new_data, weight_column = population_total) {
30+
poststratified_draws <- model %>%
31+
tidybayes::add_predicted_draws(newdata = new_data) %>%
32+
# rename(prediction = .prediction) %>%
33+
mutate(prediction_sum = .prediction * {{ weight_column }}) %>%
34+
ungroup()
35+
36+
poststratified_draws
37+
}
38+
39+
40+
get_strata_estimates <- function(model, new_data, group_variables, weight_column = population_total, lower_confidence = 0.025, upper_confidence = 1-lower_confidence) {
41+
42+
model %>%
43+
tidybayes::add_predicted_draws(newdata = new_data) %>%
44+
ungroup() %>%
45+
dplyr::group_by(dplyr::across({{ group_variables }}), .draw) %>%
46+
dplyr::summarise(.prediction = mean(.prediction), .groups = "drop") %>%
47+
ungroup() %>%
48+
dplyr::group_by(dplyr::across({{ group_variables }})) %>%
49+
dplyr::summarise(
50+
mean = mean(.prediction),
51+
lower = quantile(.prediction, lower_confidence),
52+
upper = quantile(.prediction, upper_confidence),
53+
.groups = "drop") %>%
54+
ungroup()
55+
}
56+
57+
#' Adds a population_proportion column to a poststratification frame.
58+
#'
59+
#' @param poststratification_frame
60+
#' @param model_variables
61+
#' @param estimates_by
62+
#' @param weight_column
63+
#'
64+
#' @return
65+
#' @export
66+
#'
67+
#' @examples
68+
add_proportion <- function(poststratification_frame, model_variables, estimates_by, weight_column = population_total) {
69+
70+
# variable_group <- poststratification_frame %>%
71+
# dplyr::select( {{ model_variables }} ) %>%
72+
# dplyr::select( -{{ estimates_by }} ) %>%
73+
# names()
74+
75+
# dataset %>%
76+
# ungroup() %>%
77+
# dplyr::group_by(dplyr::across({{ variable_group_3 }})) %>%
78+
# dplyr::summarise(result = max(col_4))
79+
80+
results_by_totals <- poststratification_frame %>%
81+
dplyr::group_by(dplyr::across({{ estimates_by }})) %>%
82+
dplyr::summarise(population_total_sum = sum({{ weight_column }}),
83+
.groups = "drop")
84+
85+
poststratification_frame %>%
86+
dplyr::left_join(results_by_totals) %>%
87+
dplyr::group_by(dplyr::across({{ model_variables }}), population_total_sum) %>%
88+
dplyr::summarise(population_total = sum(population_total)) %>%
89+
dplyr::mutate(strata_proportion = population_total/population_total_sum) %>%
90+
select(-population_total_sum)
91+
# dplyr::mutate(population_proportion = population_total/population_total_sum) %>%
92+
# population
93+
# dplyr::group_by()
94+
# population_proportion = {{ weight_column }}/population_total_sum,
95+
# .groups = "drop")
96+
}
97+
98+
get_poststratified_estimates <- function(model, new_data, model_variables, estimates_by, weight_column = population_total, lower_confidence = 0.025, upper_confidence = 1-lower_confidence) {
99+
100+
# add way to automatically get model variables from model
101+
102+
poststratification_frame <- add_proportion(new_data, {{ model_variables }}, {{ estimates_by }})
103+
104+
model %>%
105+
tidybayes::add_predicted_draws(newdata = poststratification_frame) %>%
106+
ungroup() %>%
107+
rename(strata_prediction = .prediction) %>%
108+
dplyr::mutate(contributing_prediction = strata_prediction*strata_proportion) %>%
109+
dplyr::group_by(dplyr::across({{ estimates_by }}), .draw) %>%
110+
dplyr::summarise(pop_prediction = sum(contributing_prediction),
111+
.groups = "drop") %>%
112+
ungroup() %>%
113+
dplyr::group_by(dplyr::across({{ estimates_by }})) %>%
114+
dplyr::summarise(
115+
mean_estimate = mean(pop_prediction),
116+
lower_estimate = quantile(pop_prediction, lower_confidence),
117+
upper_estimate = quantile(pop_prediction, upper_confidence),
118+
.groups = "drop") %>%
119+
ungroup()
120+
121+
}
122+
123+
# poststratification_frame_2 <- add_proportion(poststratification_frame,
124+
# model_variables = c(age_group, region),
125+
# estimates_by = region)
126+
#
127+
# hi <- binary_model_1 %>%
128+
# tidybayes::add_predicted_draws(newdata = poststratification_frame_2) %>%
129+
# ungroup() %>%
130+
# rename(strata_prediction = .prediction) %>%
131+
# dplyr::mutate(contributing_prediction = strata_prediction*strata_proportion) %>%
132+
# dplyr::group_by(region, .draw) %>%
133+
# dplyr::summarise(pop_prediction = sum(contributing_prediction), .groups = "drop")

R/analyse.html

+433
Large diffs are not rendered by default.

R/visualise.R

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
visualise_model_effect <- function(data, post_stratified_estimates) {
2+
post_stratified_estimates %>%
3+
ggplot(aes(y = mean, x = forcats::fct_inorder(state), color = "MRP estimate")) +
4+
geom_point() +
5+
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
6+
geom_point(data = example_poll %>%
7+
group_by(state, supports_ALP) %>%
8+
summarise(n = n()) %>%
9+
group_by(state) %>%
10+
mutate(prop = n/sum(n)) %>%
11+
filter(supports_ALP==1),
12+
aes(state, prop, color = "Raw data"))
13+
}

data-raw/simulated_data.R

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# example_census <- read.csv("https://github.com/RohanAlexander/mrp_workshop/blob/master/outputs/data/census_data.csv")
2+
3+
region_options <- c("A", "B", "C", "D", "E")
4+
age_group_options <- c("16_to_24", "25_to_34", "35_to_44", "45_to_54", "55_to_64", "65_plus")
5+
age_options <- 16:80
6+
ethnicity_options <- 1:5
7+
gender_options <- c("male", "female", "other")
8+
9+
10+
census_size <- 1000000
11+
12+
example_census <- tibble::tibble(
13+
region = sample(region_options, size = census_size, replace = TRUE),
14+
age_group = sample(age_group_options, size = census_size, replace = TRUE),
15+
ethnicity = sample(ethnicity_options, size = census_size, replace = TRUE),
16+
gender = sample(gender_options, size = census_size, replace = TRUE, prob = c(0.48, 0.48, 0.04))
17+
) %>%
18+
dplyr::group_by(region, age_group, ethnicity, gender) %>%
19+
dplyr::summarise(population_total = dplyr::n(),
20+
.groups = "drop")
21+
22+
usethis::use_data(example_census, overwrite = TRUE)
23+
24+
25+
26+
27+
survey_size <- 17000
28+
29+
30+
example_survey <- tibble::tibble(
31+
region = sample(region_options, size = survey_size, replace = TRUE),
32+
age = sample(age_options, size = survey_size, replace = TRUE),
33+
ethnicity = sample(ethnicity_options, size = survey_size, replace = TRUE),
34+
gender = sample(gender_options, size = survey_size, replace = TRUE, prob = c(0.48, 0.48, 0.04)) %>%
35+
forcats::as_factor(),
36+
non_negative_response = rgamma(
37+
n = survey_size,
38+
shape = 1*age/max(age_options),
39+
scale = 1),
40+
binary_response = rbinom(
41+
size = 1,
42+
n = survey_size,
43+
p = age/max(age_options)) %>%
44+
as.numeric()
45+
)
46+
47+
48+
usethis::use_data(example_survey, overwrite = TRUE)

data/example_census.rda

1.21 KB
Binary file not shown.

data/example_survey.rda

160 KB
Binary file not shown.

man/create_poststratification_frame.Rd

+17
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat.R

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
library(testthat)
2+
library(tidymrp)
3+
4+
test_check("tidymrp")
5+

tidymrp.Rproj

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Version: 1.0
2+
3+
RestoreWorkspace: Default
4+
SaveWorkspace: Default
5+
AlwaysSaveHistory: Default
6+
7+
EnableCodeIndexing: Yes
8+
UseSpacesForTab: Yes
9+
NumSpacesForTab: 2
10+
Encoding: UTF-8
11+
12+
RnwWeave: Sweave
13+
LaTeX: pdfLaTeX
14+
15+
AutoAppendNewline: Yes
16+
StripTrailingWhitespace: Yes
17+
18+
BuildType: Package
19+
PackageUseDevtools: Yes
20+
PackageInstallArgs: --no-multiarch --with-keep.source

vignettes/.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.html
2+
*.R
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
---
2+
title: "Advanced Poststratification"
3+
output: rmarkdown::html_vignette
4+
vignette: >
5+
%\VignetteIndexEntry{advanced_poststratification}
6+
%\VignetteEngine{knitr::rmarkdown}
7+
%\VignetteEncoding{UTF-8}
8+
---
9+
10+
```{r, include = FALSE}
11+
knitr::opts_chunk$set(
12+
collapse = TRUE,
13+
comment = "#>"
14+
)
15+
```
16+
17+
```{r setup}
18+
library(tidymrp)
19+
```
20+
21+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
title: "Creating a Poststratification Frame"
3+
output: rmarkdown::html_vignette
4+
vignette: >
5+
%\VignetteIndexEntry{creating_a_poststratification_frame}
6+
%\VignetteEngine{knitr::rmarkdown}
7+
%\VignetteEncoding{UTF-8}
8+
---
9+
10+
```{r, include = FALSE}
11+
knitr::opts_chunk$set(
12+
collapse = TRUE,
13+
comment = "#>"
14+
)
15+
```
16+
17+
```{r setup}
18+
library(tidymrp)
19+
```
20+

0 commit comments

Comments
 (0)