Skip to content

Commit 0c8f9e5

Browse files
authored
Merge pull request #72 from NSAPH-Software/release_v0.1.0
Release v0.1.0
2 parents 4ace5f1 + 3d9e771 commit 0c8f9e5

File tree

345 files changed

+25594
-9349
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

345 files changed

+25594
-9349
lines changed

.Rbuildignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,11 @@ _archive
99
^doc$
1010
^Meta$
1111
index.md
12+
^CRAN-RELEASE$
13+
^cran-comments\.md$
14+
^codecov\.yml$
15+
^.covrignore
16+
^_R/*
17+
^docker_singularity/*$
18+
^functional_tests/*$
19+
^CRAN-SUBMISSION$

.covrignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
R/zzz.R
2+
R/plot.R
3+
R/print.R
4+
R/check_hyper_params.R
5+
R/logger_utils.R

.github/workflows/R-CMD-check.yaml

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
matrix:
2424
config:
2525
- {os: windows-latest, r: 'release'}
26-
#- {os: macOS-latest, r: 'release'}
26+
- {os: macOS-latest, r: 'release'}
2727
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
2828
- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
2929

@@ -71,17 +71,18 @@ jobs:
7171
LDFLAGS_cmd=$(brew info llvm | grep "export LDFLAGS=")
7272
LDFLAGS_path=$(echo $LDFLAGS_cmd | awk '{print $2}' | sed 's/"//g')
7373
eval $LDFLAGS_cmd
74-
clang_path=$(which clang)
75-
clangpp_path=$(which clang++)
74+
llvm_path_tmp=$(brew info llvm | grep "echo 'export PATH")
75+
llvm_path=$(echo $llvm_path_tmp | awk '{print $3}'| sed 's/PATH="//' | sed 's/:.*//')
7676
mkdir ~/.R
7777
touch ~/.R/Makevars
78-
echo "CC=$clang_path" >> ~/.R/Makevars
79-
echo "CXX=$clangpp_path" >> ~/.R/Makevars
80-
echo "CXX11=$clangpp_path" >> ~/.R/Makevars
81-
echo "CXX14=$clangpp_path" >> ~/.R/Makevars
82-
echo "CXX17=$clangpp_path" >> ~/.R/Makevars
83-
echo "CXX1X=$clangpp_path" >> ~/.R/Makevars
78+
echo "CC=$llvm_path/clang" >> ~/.R/Makevars
79+
echo "CXX=$llvm_path/clang++" >> ~/.R/Makevars
80+
echo "CXX11=$llvm_path/clang++" >> ~/.R/Makevars
81+
echo "CXX14=$llvm_path/clang++" >> ~/.R/Makevars
82+
echo "CXX17=$llvm_path/clang++" >> ~/.R/Makevars
83+
echo "CXX1X=$llvm_path/clang++" >> ~/.R/Makevars
8484
echo "$LDFLAGS_path -o task -fopenmp" >> ~/.R/Makevars
85+
cat ~/.R/Makevars
8586
8687
8788
- name: Install dependencies
@@ -90,8 +91,9 @@ jobs:
9091
remotes::install_cran("rcmdcheck")
9192
remotes::install_cran("devtools")
9293
remotes::install_cran("RcppParallel")
93-
devtools::install_github("JingyuHe/XBART")
94-
devtools::install_github("socket778/XBCF")
94+
remotes::install_cran("baggr")
95+
remotes::install_cran("grf")
96+
remotes::install_cran("BART")
9597
shell: Rscript {0}
9698

9799
- name: Check
@@ -108,3 +110,7 @@ jobs:
108110
with:
109111
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
110112
path: check
113+
114+
- name: Test coverage
115+
run: covr::codecov()
116+
shell: Rscript {0}

DESCRIPTION

Lines changed: 64 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,74 @@
1+
Type: Package
12
Package: CRE
2-
Title: Interpretable Subgroups Identification through Ensemble Learning of
3+
Title: Interpretable Subgroups Identification Through Ensemble Learning of
34
Causal Rules
4-
Version: 0.0.1
5+
Version: 0.1.0
56
Authors@R: c(
6-
person(given="Kwonsang",
7-
family = "Lee",
8-
role = c("aut"),
9-
email = "[email protected]",
10-
comment = c(ORCID = "0000-0002-5823-4331")),
11-
person(given="Falco Joannes",
12-
family = "Bargagli Stoffi",
13-
role = c("aut"),
14-
email = "[email protected]",
15-
comment = c(ORCID = "0000-0002-6131-8165")),
16-
person(given="Daniela Maria",
17-
family = "Garcia",
18-
role = c("aut"),
19-
email = "[email protected]",
7+
person("Naeem", "Khoshnevis", , "[email protected]", role = c("aut", "cre"),
8+
comment = c(ORCID = "0000-0003-4315-1426", AFFILIATION = "FASRC")),
9+
person("Daniela Maria", "Garcia", , "[email protected]", role = "aut",
2010
comment = c(ORCID = "0000-0003-3226-3561")),
21-
person(given="Naeem",
22-
family="Khoshnevis",
23-
email = "[email protected]",
24-
role=c("aut","cre"),
25-
comment = c(ORCID = "0000-0003-4315-1426", AFFILIATION="FASRC")))
11+
person("Riccardo", "Cadei", , "[email protected]", role = "aut",
12+
comment = c(ORCID = "0000-0003-2416-8943")),
13+
person("Kwonsang", "Lee", , "[email protected]", role = "aut",
14+
comment = c(ORCID = "0000-0002-5823-4331")),
15+
person("Falco Joannes", "Bargagli Stoffi", , "[email protected]", role = "aut",
16+
comment = c(ORCID = "0000-0002-6131-8165"))
17+
)
2618
Maintainer: Naeem Khoshnevis <[email protected]>
27-
Description: Provides an interpretable identification of subgroups with
28-
heterogeneous causal effect. The heterogeneous subgroups are discovered
29-
through ensemble learning of causal rules. Causal rules are highly
30-
interpretable if-then statement that recursively partition the features
31-
space into heterogeneous subgroups. A small number of significant causal
32-
rules are selected through Stability Selection to control for family-wise
33-
error rate in the finite sample setting. It proposes various estimation
34-
methods for the conditional causal effects for each discovered causal rule.
35-
It is highly flexible and multiple causal estimands and imputation methods
36-
are implemented.
37-
Lee, K., Bargagli-Stoffi, F. J., & Dominici, F. (2020). Causal rule ensemble:
38-
Interpretable inference of heterogeneous treatment effects.
39-
arXiv preprint <arXiv:2009.09036>.
19+
Description: Provides an interpretable identification of subgroups with
20+
heterogeneous causal effect. The heterogeneous subgroups are
21+
discovered through ensemble learning of causal rules. Causal rules are
22+
highly interpretable if-then statement that recursively partition the
23+
features space into heterogeneous subgroups. A small number of
24+
significant causal rules are selected through Stability Selection to
25+
control for family-wise error rate in the finite sample setting. It
26+
proposes various estimation methods for the conditional causal effects
27+
for each discovered causal rule. It is highly flexible and multiple
28+
causal estimands and imputation methods are implemented. Lee, K.,
29+
Bargagli-Stoffi, F. J., & Dominici, F. (2020). Causal rule ensemble:
30+
Interpretable inference of heterogeneous treatment effects. arXiv
31+
preprint <arXiv:2009.09036>.
4032
License: GPL-3
41-
URL: https://github.com/fasrc/CRE
42-
BugReports: https://github.com/fasrc/CRE/issues
33+
URL: https://github.com/NSAPH-Software/CRE
34+
BugReports: https://github.com/NSAPH-Software/CRE/issues
35+
Depends:
36+
R (>= 3.5.0)
37+
Imports:
38+
MASS,
39+
stats,
40+
logger,
41+
gbm,
42+
randomForest,
43+
methods,
44+
xgboost,
45+
RRF,
46+
data.table,
47+
xtable,
48+
glmnet,
49+
bartCause,
50+
stabs,
51+
stringr,
52+
SuperLearner,
53+
dplyr,
54+
magrittr,
55+
ggplot2,
56+
bcf,
57+
inTrees
58+
Suggests:
59+
baggr,
60+
grf,
61+
BART,
62+
gnm,
63+
covr,
64+
knitr,
65+
rmarkdown,
66+
testthat (>= 3.0.0)
67+
VignetteBuilder:
68+
knitr
4369
Copyright: Harvard University
4470
Encoding: UTF-8
71+
Language: en-US
4572
LazyData: true
4673
Roxygen: list(markdown = TRUE)
47-
RoxygenNote: 7.1.1
48-
Suggests:
49-
knitr,
50-
rmarkdown,
51-
testthat (>= 3.0.0)
52-
Config/testthat/edition: 3
53-
Imports: tidyverse,
54-
dplyr,
55-
BART,
56-
bcf,
57-
randomForest,
58-
xgboost,
59-
gbm,
60-
inTrees,
61-
stabs,
62-
rpart,
63-
grf,
64-
glmnet,
65-
MASS,
66-
doParallel,
67-
foreach,
68-
devtools,
69-
XBART,
70-
XBCF,
71-
stringr,
72-
bartCause,
73-
gnm,
74-
magrittr,
75-
baggr,
76-
logger
77-
VignetteBuilder: knitr
74+
RoxygenNote: 7.2.1

NAMESPACE

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,19 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
export(analyze_sensitivity)
4-
export(check_input_data)
3+
S3method(plot,cre)
4+
S3method(print,cre)
5+
S3method(summary,cre)
56
export(cre)
6-
export(estimate_cate)
7-
export(estimate_ite)
8-
export(estimate_ite_bart)
9-
export(estimate_ite_bcf)
10-
export(estimate_ite_cf)
11-
export(estimate_ite_ipw)
12-
export(estimate_ite_or)
13-
export(estimate_ite_poisson)
14-
export(estimate_ite_sipw)
15-
export(estimate_ite_xbart)
16-
export(estimate_ite_xbcf)
17-
export(estimate_ps)
18-
export(extract_rules)
197
export(generate_cre_dataset)
20-
export(generate_rules)
21-
export(generate_rules_matrix)
228
export(get_logger)
23-
export(interpret_select_rules)
24-
export(select_causal_rules)
259
export(set_logger)
26-
export(split_data)
27-
export(take1)
10+
import(SuperLearner)
11+
import(data.table)
12+
import(stats)
13+
import(xtable)
14+
importFrom(RRF,RRF)
15+
importFrom(RRF,getTree)
16+
importFrom(gbm,pretty.gbm.tree)
17+
importFrom(ggplot2,autoplot)
18+
importFrom(methods,as)
19+
importFrom(xgboost,xgb.model.dt.tree)

NEWS.md

Lines changed: 67 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,72 @@
1+
## CRE 0.1.0 (2022-10-17)
2+
3+
### Changed
4+
5+
* `select_causal_rules()` is now `lasso_rules_filter()`
6+
* rules generation now accepts replace parameter to set replacement in bootstrapping
7+
* rename parameter `t` with `t_anom`
8+
* add parameter `t_corr` discard correlation threshold
9+
* define `discard_anomalous_rules()` and `discard_corre_rules()` functions and
10+
and relative tests
11+
* reorganize `generate_rules_matrix()` (separate standardization, and remove filtering)
12+
* explicit `prune_rules()` function and add relative tests
13+
* remove `take1()` function for random Rule Selection
14+
* add effect modifiers filter for Rule Generation
15+
* add `generate_causal_rules()` function and relative tests
16+
* solve Undesired 'All' Decision Rule Issue
17+
* solve No Causal Rule Selected Issue
18+
* improve `cre.summary()` function
19+
* `min_nodes` --> `node_size` (following the randomForest convention)
20+
* `estimate_cate` include five methods for estimating the CATE values (`poisson`, `DRLearner`, `bart-baggr`, `cf-means`, `linreg`)
21+
* `cre` added new arguments to (1) complement `SuperLearner` package (`ps_method_dis`, `ps_method_inf`, `or_method_dis`, `or_method_inf`, `cate_SL_library`) and to (2) select CATE method and (3) whether to filter CATE p-values (`cate_method` and `filter_cate`).
22+
Now returns an S3 object.
23+
* `estimate_ite_xyz` conduct propensity score estimation using helper function with `SuperLearner` package
24+
* `generate_cre_dataset` make number of covariates an argument of the function
25+
* improve examples and update tests for all functions
26+
27+
28+
### Added
29+
* `print` and `summary` generic functions.
30+
* `check_input` function to isolate input checks.
31+
* `estimate_ite_aipw` function for augmented inverse propensity weighting
32+
* `plot.cre` generic function to plot CRE S3 object Results
33+
* `test-cre_functional.R` tests the functionality of the package
34+
* `stability_selection` function for causal rules selection
35+
36+
### Removed
37+
38+
* `estimate_ite_blp` function
39+
140
## CRE 0.0.1 (2021-10-20)
241

342
### Changed
4-
- `estimate_cate` include two methods for estimating the CATE values
5-
- `cre` added initial checks for binary outcome and whether to include the propensity score in the ITE estimation
6-
- `estimate_ite_xyz` conduct propensity score estimation using helper function
7-
- Removed `seed` as an input from `generate_cre_dataset` function.
43+
* `estimate_cate` include two methods for estimating the CATE values
44+
* `cre` added initial checks for binary outcome and whether to include the propensity score in the ITE estimation
45+
* `estimate_ite_xyz` conduct propensity score estimation using helper function
46+
* Removed `seed` as an input from `generate_cre_dataset` function.
847

948
### Added
10-
- `set_logger` and `get_logger`
11-
- `check_input_data` function
12-
- example to `generate_cre_dataset`
13-
- `generate_cre_dataset` function to generate synthetic data for testing the package
14-
- `test-generate_cre_dataset` function test
15-
- `estimate_ps` function to estimate the propensity score
16-
- `estimate_ite_xbart` function to generate ITE estimates using accelerated BART
17-
- `estimate_ite_xbcf` function to generate ITE estimates using accelerated BCF
18-
- `analyze_sensitivity` function to conduct sensitivity analysis for unmeasured confounding
19-
- `cre` function to perform the entire Causal Rule Ensemble method
20-
- `estimate_cate` function to generate CATE estimates from the ITE estimates and select rules
21-
- `estimate_ite` function to generate ITE estimates using the user-specified method (calls the other `estimate_ite_xyz` functions)
22-
- `estimate_ite_bart` function to generate ITE estimates using BART
23-
- `estimate_ite_bcf` function to generate ITE estimates using Bayesian Causal Forests
24-
- `estimate_ite_cf` function to generate ITE estimates using Causal Forests
25-
- `estimate_ite_ipw` function to generate ITE estimates using IPW
26-
- `estimate_ite_or` function to generate ITE estimates using Outcome Regression
27-
- `estimate_ite_sipw` function to generate ITE estimates using SIPW
28-
- `extract_rules` function to extract a list of causal rules from randomForest and GBM models
29-
- `generate_rules` function to generate causal rule models using randomForest and GBM methods
30-
- `generate_rules_matrix` function to convert a list of causal rules into a matrix
31-
- `select_causal_rules` function to apply penalized regression to causal rules to select only the most important ones
32-
- `split_data` function to split input data into discovery and inference subsamples
33-
- `take1` function to create a subsample of indices
49+
* `set_logger` and `get_logger`
50+
* `check_input_data` function
51+
* example to `generate_cre_dataset`
52+
* `generate_cre_dataset` function to generate synthetic data for testing the package
53+
* `test-generate_cre_dataset` function test
54+
* `estimate_ps` function to estimate the propensity score
55+
* `estimate_ite_xbart` function to generate ITE estimates using accelerated BART
56+
* `estimate_ite_xbcf` function to generate ITE estimates using accelerated BCF
57+
* `analyze_sensitivity` function to conduct sensitivity analysis for unmeasured confounding
58+
* `cre` function to perform the entire Causal Rule Ensemble method
59+
* `estimate_cate` function to generate CATE estimates from the ITE estimates and select rules
60+
* `estimate_ite` function to generate ITE estimates using the user-specified method (calls the other `estimate_ite_xyz` functions)
61+
* `estimate_ite_bart` function to generate ITE estimates using BART
62+
* `estimate_ite_bcf` function to generate ITE estimates using Bayesian Causal Forests
63+
* `estimate_ite_cf` function to generate ITE estimates using Causal Forests
64+
* `estimate_ite_ipw` function to generate ITE estimates using IPW
65+
* `estimate_ite_or` function to generate ITE estimates using Outcome Regression
66+
* `estimate_ite_sipw` function to generate ITE estimates using SIPW
67+
* `extract_rules` function to extract a list of causal rules from randomForest and GBM models
68+
* `generate_rules` function to generate causal rule models using randomForest and GBM methods
69+
* `generate_rules_matrix` function to convert a list of causal rules into a matrix
70+
* `select_causal_rules` function to apply penalized regression to causal rules to select only the most important ones
71+
* `split_data` function to split input data into discovery and inference subsamples
72+
* `take1` function to create a subsample of indices

R/CRE_package.R

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#' @title
2+
#' The 'CRE' package.
3+
#'
4+
#' @description
5+
#' Provides an interpretable identification of subgroups with
6+
#' heterogeneous causal effect. The heterogeneous subgroups are
7+
#' discovered through ensemble learning of causal rules. Causal rules are
8+
#' highly interpretable if-then statement that recursively partition the
9+
#' features space into heterogeneous subgroups. A small number of
10+
#' significant causal rules are selected through Stability Selection to
11+
#' control for family-wise error rate in the finite sample setting. It
12+
#' proposes various estimation methods for the conditional causal effects
13+
#' for each discovered causal rule. It is highly flexible and multiple
14+
#' causal estimands and imputation methods are implemented.
15+
#'
16+
#' @docType package
17+
#' @name CRE-package
18+
#' @aliases CRE
19+
#' @author Naeem Khoshnevis
20+
#' @author Daniela Maria Garcia
21+
#' @author Riccardo Cadei
22+
#' @author Kwonsang Lee
23+
#' @author Falco Joannes Bargagli Stoffi
24+
#' @import xtable
25+
#' @import data.table
26+
#' @importFrom RRF RRF
27+
#' @importFrom RRF getTree
28+
#' @importFrom gbm pretty.gbm.tree
29+
#' @importFrom xgboost xgb.model.dt.tree
30+
#' @import stats
31+
#' @importFrom methods as
32+
#'
33+
#' @references
34+
#' Lee, K.,
35+
#' Bargagli-Stoffi, F. J., & Dominici, F. (2020). Causal rule ensemble:
36+
#' Interpretable inference of heterogeneous treatment effects. arXiv
37+
#' preprint arXiv:2009.09036.
38+
#'
39+
NULL

0 commit comments

Comments
 (0)