Skip to content

Commit 4ace5f1

Browse files
authored
Merge pull request #27 from fasrc/release_v0.0.1
Release v0.0.1
2 parents e9189a3 + 91ffea5 commit 4ace5f1

File tree

177 files changed

+13905
-4058
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

177 files changed

+13905
-4058
lines changed

.Rbuildignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
^CRE\.Rproj$
22
^\.Rproj\.user$
33
^LICENSE\.md$
4+
^\.github$
5+
_archive
6+
^_pkgdown\.yml$
7+
^docs$
8+
^pkgdown$
9+
^doc$
10+
^Meta$
11+
index.md

.github/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.html

.github/workflows/R-CMD-check.yaml

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
2+
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- develop
8+
pull_request:
9+
branches:
10+
- main
11+
- develop
12+
13+
name: R-CMD-check
14+
15+
jobs:
16+
R-CMD-check:
17+
runs-on: ${{ matrix.config.os }}
18+
19+
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20+
21+
strategy:
22+
fail-fast: false
23+
matrix:
24+
config:
25+
- {os: windows-latest, r: 'release'}
26+
#- {os: macOS-latest, r: 'release'}
27+
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
28+
- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
29+
30+
env:
31+
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
32+
RSPM: ${{ matrix.config.rspm }}
33+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
34+
35+
steps:
36+
- uses: actions/checkout@v2
37+
38+
- uses: r-lib/actions/setup-r@v1
39+
with:
40+
r-version: ${{ matrix.config.r }}
41+
42+
- uses: r-lib/actions/setup-pandoc@v1
43+
44+
- name: Query dependencies
45+
run: |
46+
install.packages('remotes')
47+
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
48+
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
49+
shell: Rscript {0}
50+
51+
- name: Restore R package cache
52+
uses: actions/cache@v2
53+
with:
54+
path: ${{ env.R_LIBS_USER }}
55+
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
56+
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
57+
58+
- name: Install system dependencies
59+
if: runner.os == 'Linux'
60+
run: |
61+
while read -r cmd
62+
do
63+
eval sudo $cmd
64+
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
65+
66+
- name: Install omp
67+
if: runner.os == 'macOS'
68+
run: |
69+
brew install gfortran
70+
brew install llvm boost libomp
71+
LDFLAGS_cmd=$(brew info llvm | grep "export LDFLAGS=")
72+
LDFLAGS_path=$(echo $LDFLAGS_cmd | awk '{print $2}' | sed 's/"//g')
73+
eval $LDFLAGS_cmd
74+
clang_path=$(which clang)
75+
clangpp_path=$(which clang++)
76+
mkdir ~/.R
77+
touch ~/.R/Makevars
78+
echo "CC=$clang_path" >> ~/.R/Makevars
79+
echo "CXX=$clangpp_path" >> ~/.R/Makevars
80+
echo "CXX11=$clangpp_path" >> ~/.R/Makevars
81+
echo "CXX14=$clangpp_path" >> ~/.R/Makevars
82+
echo "CXX17=$clangpp_path" >> ~/.R/Makevars
83+
echo "CXX1X=$clangpp_path" >> ~/.R/Makevars
84+
echo "$LDFLAGS_path -o task -fopenmp" >> ~/.R/Makevars
85+
86+
87+
- name: Install dependencies
88+
run: |
89+
remotes::install_deps(dependencies = TRUE)
90+
remotes::install_cran("rcmdcheck")
91+
remotes::install_cran("devtools")
92+
remotes::install_cran("RcppParallel")
93+
devtools::install_github("JingyuHe/XBART")
94+
devtools::install_github("socket778/XBCF")
95+
shell: Rscript {0}
96+
97+
- name: Check
98+
env:
99+
_R_CHECK_CRAN_INCOMING_REMOTE_: false
100+
run: |
101+
options(crayon.enabled = TRUE)
102+
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
103+
shell: Rscript {0}
104+
105+
- name: Upload check results
106+
if: failure()
107+
uses: actions/upload-artifact@main
108+
with:
109+
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
110+
path: check

DESCRIPTION

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,77 @@
11
Package: CRE
2-
Title: What the Package Does (One Line, Title Case)
2+
Title: Interpretable Subgroups Identification through Ensemble Learning of
3+
Causal Rules
34
Version: 0.0.1
4-
Authors@R:
5+
Authors@R: c(
56
person(given="Kwonsang",
67
family = "Lee",
78
role = c("aut"),
89
email = "[email protected]",
9-
comment = c(ORCID = "0000-0002-5823-4331"))
10-
Description: What the package does (one paragraph).
10+
comment = c(ORCID = "0000-0002-5823-4331")),
11+
person(given="Falco Joannes",
12+
family = "Bargagli Stoffi",
13+
role = c("aut"),
14+
email = "[email protected]",
15+
comment = c(ORCID = "0000-0002-6131-8165")),
16+
person(given="Daniela Maria",
17+
family = "Garcia",
18+
role = c("aut"),
19+
email = "[email protected]",
20+
comment = c(ORCID = "0000-0003-3226-3561")),
21+
person(given="Naeem",
22+
family="Khoshnevis",
23+
email = "[email protected]",
24+
role=c("aut","cre"),
25+
comment = c(ORCID = "0000-0003-4315-1426", AFFILIATION="FASRC")))
26+
Maintainer: Naeem Khoshnevis <[email protected]>
27+
Description: Provides an interpretable identification of subgroups with
28+
heterogeneous causal effect. The heterogeneous subgroups are discovered
29+
through ensemble learning of causal rules. Causal rules are highly
30+
interpretable if-then statement that recursively partition the features
31+
space into heterogeneous subgroups. A small number of significant causal
32+
rules are selected through Stability Selection to control for family-wise
33+
error rate in the finite sample setting. It proposes various estimation
34+
methods for the conditional causal effects for each discovered causal rule.
35+
It is highly flexible and multiple causal estimands and imputation methods
36+
are implemented.
37+
Lee, K., Bargagli-Stoffi, F. J., & Dominici, F. (2020). Causal rule ensemble:
38+
Interpretable inference of heterogeneous treatment effects.
39+
arXiv preprint <arXiv:2009.09036>.
1140
License: GPL-3
41+
URL: https://github.com/fasrc/CRE
42+
BugReports: https://github.com/fasrc/CRE/issues
43+
Copyright: Harvard University
1244
Encoding: UTF-8
1345
LazyData: true
1446
Roxygen: list(markdown = TRUE)
1547
RoxygenNote: 7.1.1
48+
Suggests:
49+
knitr,
50+
rmarkdown,
51+
testthat (>= 3.0.0)
52+
Config/testthat/edition: 3
53+
Imports: tidyverse,
54+
dplyr,
55+
BART,
56+
bcf,
57+
randomForest,
58+
xgboost,
59+
gbm,
60+
inTrees,
61+
stabs,
62+
rpart,
63+
grf,
64+
glmnet,
65+
MASS,
66+
doParallel,
67+
foreach,
68+
devtools,
69+
XBART,
70+
XBCF,
71+
stringr,
72+
bartCause,
73+
gnm,
74+
magrittr,
75+
baggr,
76+
logger
77+
VignetteBuilder: knitr

NAMESPACE

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export(analyze_sensitivity)
4+
export(check_input_data)
45
export(cre)
56
export(estimate_cate)
6-
export(estimate_ipw_sipw)
77
export(estimate_ite)
88
export(estimate_ite_bart)
99
export(estimate_ite_bcf)
1010
export(estimate_ite_cf)
1111
export(estimate_ite_ipw)
1212
export(estimate_ite_or)
13+
export(estimate_ite_poisson)
14+
export(estimate_ite_sipw)
15+
export(estimate_ite_xbart)
16+
export(estimate_ite_xbcf)
17+
export(estimate_ps)
18+
export(extract_rules)
19+
export(generate_cre_dataset)
1320
export(generate_rules)
14-
export(generate_ruls_matrix)
21+
export(generate_rules_matrix)
22+
export(get_logger)
23+
export(interpret_select_rules)
1524
export(select_causal_rules)
25+
export(set_logger)
1626
export(split_data)
27+
export(take1)

NEWS.md

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,33 @@
1-
## CRE (devloping version)
2-
3-
### Added
1+
## CRE 0.0.1 (2021-10-20)
42

53
### Changed
4+
- `estimate_cate` include two methods for estimating the CATE values
5+
- `cre` added initial checks for binary outcome and whether to include the propensity score in the ITE estimation
6+
- `estimate_ite_xyz` conduct propensity score estimation using helper function
7+
- Removed `seed` as an input from `generate_cre_dataset` function.
68

7-
### Fixed
8-
9-
### Removed
10-
9+
### Added
10+
- `set_logger` and `get_logger`
11+
- `check_input_data` function
12+
- example to `generate_cre_dataset`
13+
- `generate_cre_dataset` function to generate synthetic data for testing the package
14+
- `test-generate_cre_dataset` function test
15+
- `estimate_ps` function to estimate the propensity score
16+
- `estimate_ite_xbart` function to generate ITE estimates using accelerated BART
17+
- `estimate_ite_xbcf` function to generate ITE estimates using accelerated BCF
18+
- `analyze_sensitivity` function to conduct sensitivity analysis for unmeasured confounding
19+
- `cre` function to perform the entire Causal Rule Ensemble method
20+
- `estimate_cate` function to generate CATE estimates from the ITE estimates and select rules
21+
- `estimate_ite` function to generate ITE estimates using the user-specified method (calls the other `estimate_ite_xyz` functions)
22+
- `estimate_ite_bart` function to generate ITE estimates using BART
23+
- `estimate_ite_bcf` function to generate ITE estimates using Bayesian Causal Forests
24+
- `estimate_ite_cf` function to generate ITE estimates using Causal Forests
25+
- `estimate_ite_ipw` function to generate ITE estimates using IPW
26+
- `estimate_ite_or` function to generate ITE estimates using Outcome Regression
27+
- `estimate_ite_sipw` function to generate ITE estimates using SIPW
28+
- `extract_rules` function to extract a list of causal rules from randomForest and GBM models
29+
- `generate_rules` function to generate causal rule models using randomForest and GBM methods
30+
- `generate_rules_matrix` function to convert a list of causal rules into a matrix
31+
- `select_causal_rules` function to apply penalized regression to causal rules to select only the most important ones
32+
- `split_data` function to split input data into discovery and inference subsamples
33+
- `take1` function to create a subsample of indices

R/analyze_sensitivity.R

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
11
#' @title
2-
#' Title
2+
#' Sensitivity Analysis
33
#'
44
#' @description
5-
#' Description
5+
#' Method for analyzing the sensitivity of the estimates of the causal rule-specific treatment effects
66
#'
7-
#' @param tau input value description
8-
#' @param X input value description
9-
#' @param select_rules input value description
10-
#' @param method input value description
7+
#' @param ite_std the standardized ITE
8+
#' @param rules_matrix_std the standardized causal rules matrix
9+
#'
10+
#' @return a list containing the results of the sensitivity analysis
1111
#'
12-
#' @return
1312
#' @export
1413
#'
15-
#' @examples
16-
#' TBD
17-
analyze_sensitivity <- function(tau, X, select_rules, method){
14+
analyze_sensitivity <- function(ite_std, rules_matrix_std) {
1815
# TBD
1916
}

R/check_input_data.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#' @title
2+
#' Check Input Data
3+
#'
4+
#' @description
5+
#' Conducts sanity checks for the input data
6+
#'
7+
#' @param y the observed response vector
8+
#' @param z the treatment vector
9+
#' @param X the features matrix
10+
#'
11+
#' @return
12+
#' Number of data samples.
13+
#'
14+
#' @export
15+
#'
16+
check_input_data <- function(y, z, X){
17+
18+
#---------------- Input data checks ------------------------------------------
19+
## type
20+
if (!is.vector(y) & !is.numeric(y)){
21+
stop("Observed response vector (y) input values should be a numerical vector")
22+
}
23+
24+
if (!is.vector(z) & !is.numeric(z)){
25+
stop("Treatment (z) input values should be a numerical vector.")
26+
}
27+
28+
# if (!is.data.frame(X)){
29+
# stop("Covariates (x) input values should be a data.frame.")
30+
# }
31+
32+
## size
33+
y_size <- length(y)
34+
z_size <- length(z)
35+
36+
if (y_size != z_size){
37+
stop(paste("Response and and treatment vectors should be the same size. ",
38+
"Current values: ", y_size, ", ", z_size))
39+
}
40+
41+
covars_size <- dim(X)
42+
43+
if (covars_size[1] != y_size){
44+
stop(paste("Covariates (X) data.frame has different number of",
45+
"observation than response and treatment vectors.",
46+
"Current values: ", covars_size, ", ", y_size))
47+
}
48+
#-----------------------------------------------------------------------------
49+
50+
invisible(y_size)
51+
}

0 commit comments

Comments
 (0)