Skip to content

Commit 90edb4c

Browse files
authored
Merge pull request #151 from NSAPH-Software/release_ver0.2.5
Release ver0.2.5
2 parents 850e667 + f5213d1 commit 90edb4c

File tree

149 files changed

+3612
-1723
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

149 files changed

+3612
-1723
lines changed

.Rbuildignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
^CRE\.Rproj$
22
^\.Rproj\.user$
33
^LICENSE\.md$
4+
^LICENSE$
45
^\.github$
56
_archive
67
^_pkgdown\.yml$
@@ -19,3 +20,4 @@ index.md
1920
^CRAN-SUBMISSION$
2021
^paper/*$
2122
^CODE_OF_CONDUCT\.md$
23+

.github/workflows/draft-pdf.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
on: [push]
2+
3+
jobs:
4+
paper:
5+
runs-on: ubuntu-latest
6+
name: Paper Draft
7+
steps:
8+
- name: Checkout
9+
uses: actions/checkout@v2
10+
- name: Build draft PDF
11+
uses: openjournals/openjournals-draft-action@master
12+
with:
13+
journal: joss
14+
# This should be the path to the paper within your repo.
15+
paper-path: paper/paper.md
16+
- name: Upload
17+
uses: actions/upload-artifact@v1
18+
with:
19+
name: paper
20+
# This is the output path where Pandoc will write the compiled
21+
# PDF. Note, this should be the same directory as the input
22+
# paper.md
23+
path: paper/paper.pdf

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ src-i386
1515
*.log
1616
.Rdata
1717
.httr-oauth
18+
functional_tests/results/

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: CRE
33
Title: Interpretable Discovery and Inference of Heterogeneous Treatment Effects
4-
Version: 0.2.4
4+
Version: 0.2.5
55
Authors@R: c(
66
person("Naeem", "Khoshnevis", , "[email protected]", role = c("aut", "cre"),
77
comment = c(ORCID = "0000-0003-4315-1426", AFFILIATION = "FASRC")),

LICENSE

Lines changed: 595 additions & 0 deletions
Large diffs are not rendered by default.

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

33
S3method(plot,cre)
4+
S3method(predict,cre)
45
S3method(print,cre)
56
S3method(summary,cre)
67
export(cre)

NEWS.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,48 @@
1+
# CRE 0.2.5 (2023-12-6)
2+
3+
## Added
4+
* Add (vanilla) Stability Selection (without Error Control).
5+
* `max_rules` hyper parameters for max rules filtering.
6+
* Uncertainty Quantification in estimation by bootstrapping.
7+
* `B` hyper-parameter,
8+
* `subsample` hyper-parameter.
9+
* `rules`(implicit form) in cre() function return.
10+
* predict() function for ITE estimation via CRE.
11+
12+
## Changed
13+
* Type `stability_selection` binary -> string ('no','vanilla','error_control').
14+
* Unify `ntrees_gbm` hyper-parameter and `ntrees_gbm` hyper-parameter in
15+
`ntrees` hyper-parameter.
16+
* In rules generation retrieve decision rules also from internal nodes, and not
17+
just from terminal nodes.
18+
* `ite_method_dis`, `ite_method_inf` method-parameter -> `ite_method`.
19+
* `ps_method_dis`, `ps_method_inf` method-parameter -> `learner_ps`.
20+
* `oreg_method_dis`, `oreg_method_inf` method-parameter -> `learner_y`.
21+
22+
## Removed
23+
* `max_nodes` hyper-parameter.
24+
* Remove rules generation by Generalized Boosted Regression.
25+
* `replace` hyper-parameter.
26+
* `penalty_rl` hyper-parameter.
27+
* `t_pvalue` hyper-parameter.
28+
* `ite_pred` from cre() function return.
29+
30+
## Bug fixes
31+
* Error saving covariates name in CRE result when using `intervention_vars`.
32+
33+
134
# CRE 0.2.4 (2023-6-14)
235

336
## Changed
437
* Method paper description is updated.
538

39+
640
# CRE 0.2.3 (2023-4-27)
741

842
## Removed
943
* Bayesian Causal Forest (`bcf`) ITE estimator is not supported.
1044

45+
1146
# CRE 0.2.2 (2023-4-17)
1247

1348
## Changed

R/CRE_package.R

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
#' @title
2-
#' The 'CRE' package
2+
#' The CRE package
33
#'
44
#' @description
5-
#' In health and social sciences, it is critically important to
6-
#' identify subgroups of the study population where a treatment
7-
#' has notable heterogeneity in the causal effects with respect
8-
#' to the average treatment effect. Data-driven discovery of
9-
#' heterogeneous treatment effects (HTE) via decision tree methods
10-
#' has been proposed for this task. Despite its high interpretability,
11-
#' the single-tree discovery of HTE tends to be highly unstable and to
12-
#' find an oversimplified representation of treatment heterogeneity.
13-
#' To accommodate these shortcomings, we propose Causal Rule Ensemble
14-
#' (CRE), a new method to discover heterogeneous subgroups through an
15-
#' ensemble-of-trees approach. CRE has the following features:
16-
#' 1) provides an interpretable representation of the HTE; 2) allows
17-
#' extensive exploration of complex heterogeneity patterns; and 3)
18-
#' guarantees high stability in the discovery. The discovered subgroups
19-
#' are defined in terms of interpretable decision rules, and we develop
20-
#' a general two-stage approach for subgroup-specific conditional
5+
#' In health and social sciences, it is critically important to
6+
#' identify subgroups of the study population where a treatment
7+
#' has notable heterogeneity in the causal effects with respect
8+
#' to the average treatment effect. Data-driven discovery of
9+
#' heterogeneous treatment effects (HTE) via decision tree methods
10+
#' has been proposed for this task. Despite its high interpretability,
11+
#' the single-tree discovery of HTE tends to be highly unstable and to
12+
#' find an oversimplified representation of treatment heterogeneity.
13+
#' To accommodate these shortcomings, we propose Causal Rule Ensemble
14+
#' (CRE), a new method to discover heterogeneous subgroups through an
15+
#' ensemble-of-trees approach. CRE has the following features:
16+
#' 1) provides an interpretable representation of the HTE; 2) allows
17+
#' extensive exploration of complex heterogeneity patterns; and 3)
18+
#' guarantees high stability in the discovery. The discovered subgroups
19+
#' are defined in terms of interpretable decision rules, and we develop
20+
#' a general two-stage approach for subgroup-specific conditional
2121
#' causal effects estimation, providing theoretical guarantees.
2222
#'
2323
#' @docType package
@@ -39,8 +39,8 @@
3939
#' @importFrom methods as
4040
#'
4141
#' @references
42-
#' Bargagli-Stoffi, F. J., Cadei, R., Lee, K. and Dominici, F. (2023).
43-
#' Causal rule ensemble: Interpretable Discovery and Inference of
44-
#' Heterogeneous Treatment Effects,arXiv preprint arXiv:2009.09036
42+
#' Bargagli-Stoffi, F. J., Cadei, R., Lee, K. and Dominici, F. (2023).
43+
#' Causal rule ensemble: Interpretable Discovery and Inference of
44+
#' Heterogeneous Treatment Effects,arXiv preprint arXiv:2009.09036
4545
#'
4646
NULL

R/check_hyper_params.R

Lines changed: 53 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#' Check input parameters
33
#'
44
#' @description
5-
#' Checks consistency in input (hyper) parameters for the cre function.
5+
#' Checks consistency in input (hyper) parameters for the `cre` function.
66
#'
77
#' @param X_names The observed covariates names.
88
#' @param params The list of parameters required to run the function.
@@ -18,30 +18,18 @@ check_hyper_params <- function(X_names, params) {
1818
logger::log_debug("Checking hyper parameters...")
1919

2020
# Input params checks --------------------------------------------------------
21-
ntrees_rf <- getElement(params, "ntrees_rf")
22-
if (length(ntrees_rf) == 0) {
23-
ntrees_rf <- 20
21+
ntrees <- getElement(params, "ntrees")
22+
if (length(ntrees) == 0) {
23+
ntrees <- 20
2424
} else {
25-
if (!inherits(ntrees_rf, "numeric")) {
26-
stop("Invalid 'ntrees_rf' input. Please input a number.")
25+
if (!inherits(ntrees, "numeric")) {
26+
stop("Invalid 'ntrees' input. Please input a positive integer")
2727
}
28-
}
29-
params[["ntrees_rf"]] <- ntrees_rf
30-
31-
ntrees_gbm <- getElement(params, "ntrees_gbm")
32-
if (length(ntrees_gbm) == 0) {
33-
ntrees_gbm <- 20
34-
} else {
35-
if (!inherits(ntrees_gbm, "numeric")) {
36-
stop("Invalid 'ntrees_gbm' input. Please input a number.")
28+
if (ntrees<1) {
29+
stop("Invalid 'ntrees' input. Please input a positive integer")
3730
}
3831
}
39-
params[["ntrees_gbm"]] <- ntrees_gbm
40-
41-
if (params[["ntrees_gbm"]] + params[["ntrees_rf"]] == 0) {
42-
stop("The total number of trees (ntrees_rf + ntrees_gbm) has to be
43-
greater than 0")
44-
}
32+
params[["ntrees"]] <- ntrees
4533

4634
node_size <- getElement(params, "node_size")
4735
if (length(node_size) == 0) {
@@ -53,15 +41,15 @@ check_hyper_params <- function(X_names, params) {
5341
}
5442
params[["node_size"]] <- node_size
5543

56-
max_nodes <- getElement(params, "max_nodes")
57-
if (length(max_nodes) == 0) {
58-
max_nodes <- 5
44+
max_rules <- getElement(params, "max_rules")
45+
if (length(max_rules) == 0) {
46+
max_rules <- 50
5947
} else {
60-
if (!inherits(max_nodes, "numeric")) {
61-
stop("Invalid 'max_nodes' input. Please input a number.")
48+
if (!inherits(max_rules, "numeric")) {
49+
stop("Invalid 'max_rules' input. Please input a number.")
6250
}
6351
}
64-
params[["max_nodes"]] <- max_nodes
52+
params[["max_rules"]] <- max_rules
6553

6654
max_depth <- getElement(params, "max_depth")
6755
if (length(max_depth) == 0) {
@@ -119,58 +107,36 @@ check_hyper_params <- function(X_names, params) {
119107
}
120108
params[["t_corr"]] <- t_corr
121109

122-
t_pvalue <- getElement(params, "t_pvalue")
123-
if (length(t_pvalue) == 0) {
124-
t_pvalue <- 0.05
110+
stability_selection <- getElement(params, "stability_selection")
111+
if (length(stability_selection) == 0) {
112+
stability_selection <- "vanilla"
125113
} else {
126-
if (!inherits(t_pvalue, "numeric")) {
127-
stop("Invalid 't_pvalue' input. Please input a number.")
114+
if (!(stability_selection %in% c("error_control", "no","vanilla"))) {
115+
stop(paste0("Invalid `stability_selection` argument. Please input ",
116+
"a value among: {`no`, `vanilla`, `error_control`}."))
128117
}
129118
}
130-
params[["t_pvalue"]] <- t_pvalue
119+
params[["stability_selection"]] <- stability_selection
131120

132-
stability_selection <- getElement(params, "stability_selection")
133-
pfer <- getElement(params, "pfer")
134121
cutoff <- getElement(params, "cutoff")
135-
if (length(stability_selection) == 0) {
136-
stability_selection <- TRUE
137-
pfer <- 1
122+
if (length(cutoff) == 0) {
138123
cutoff <- 0.9
139124
} else {
140-
if (!(stability_selection %in% c(TRUE, FALSE))) {
141-
stop(paste0("Please specify 'TRUE' or 'FALSE' for",
142-
" the stability_selection argument."))
143-
} else if (stability_selection) {
144-
if (length(pfer) == 0) {
145-
pfer <- 1
146-
} else {
147-
if (!inherits(pfer, "numeric")) {
148-
stop("Invalid 'pfer' input. Please input a number.")
149-
}
150-
}
151-
if (length(cutoff) == 0) {
152-
cutoff <- 0.9
153-
} else {
154-
if (!inherits(cutoff, "numeric")) {
155-
stop("Invalid 'cutoff' input. Please input a number.")
156-
}
157-
}
125+
if (!inherits(cutoff, "numeric")) {
126+
stop("Invalid 'cutoff' input. Please input a number.")
158127
}
159128
}
160-
params[["stability_selection"]] <- stability_selection
161-
params[["pfer"]] <- pfer
162129
params[["cutoff"]] <- cutoff
163130

164-
165-
penalty_rl <- getElement(params, "penalty_rl")
166-
if (length(penalty_rl) == 0) {
167-
penalty_rl <- 1
131+
pfer <- getElement(params, "pfer")
132+
if (length(pfer) == 0) {
133+
pfer <- 1
168134
} else {
169-
if (!inherits(penalty_rl, "numeric")) {
170-
stop("Invalid 'penalty_rl' input. Please input a number.")
135+
if (!inherits(pfer, "numeric")) {
136+
stop("Invalid 'pfer' input. Please input a number.")
171137
}
172138
}
173-
params[["penalty_rl"]] <- penalty_rl
139+
params[["pfer"]] <- pfer
174140

175141
intervention_vars <- getElement(params, "intervention_vars")
176142
if (length(intervention_vars) == 0) {
@@ -196,6 +162,28 @@ check_hyper_params <- function(X_names, params) {
196162
}
197163
params[["offset"]] <- offset
198164

165+
# Check for correct B input
166+
B <- getElement(params, "B")
167+
if (length(B) == 0) {
168+
B <- 20
169+
} else {
170+
if (!inherits(B, "numeric")) {
171+
stop("Invalid 'B' input. Please input an integer.")
172+
}
173+
}
174+
params[["B"]] <- B
175+
176+
# Check for correct subsample imput
177+
subsample <- getElement(params, "subsample")
178+
if (length(subsample) == 0) {
179+
subsample <- 0.5
180+
} else {
181+
if (!inherits(subsample, "numeric") || (subsample < 0) || (subsample > 1)) {
182+
stop("Invalid 'subsample' input. Please input a number between 0 and 1.")
183+
}
184+
}
185+
params[["subsample"]] <- subsample
186+
199187
logger::log_debug("Done with checking hyper parameters.")
200188

201189
return(params)

R/check_input_data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#' Check input data
33
#'
44
#' @description
5-
#' Conducts sanity checks for the input data
5+
#' Conducts sanity checks for the input data.
66
#'
77
#' @param y The observed response vector.
88
#' @param z The treatment vector.

0 commit comments

Comments
 (0)