NSAPH-Software
diff --git a/‎.Rbuildignore‎
Lines changed: 8 additions & 0 deletions b/‎.Rbuildignore‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.github/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.github/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/R-CMD-check.yaml‎
Lines changed: 110 additions & 0 deletions b/‎.github/workflows/R-CMD-check.yaml‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 66 additions & 4 deletions b/‎DESCRIPTION‎
Lines changed: 66 additions & 4 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 13 additions & 2 deletions b/‎NAMESPACE‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎NEWS.md‎
Lines changed: 30 additions & 7 deletions b/‎NEWS.md‎
Lines changed: 30 additions & 7 deletions
diff --git a/‎R/analyze_sensitivity.R‎
Lines changed: 7 additions & 10 deletions b/‎R/analyze_sensitivity.R‎
Lines changed: 7 additions & 10 deletions
diff --git a/‎R/check_input_data.R‎
Lines changed: 51 additions & 0 deletions b/‎R/check_input_data.R‎
Lines changed: 51 additions & 0 deletions
@@ -1,3 +1,11 @@
 ^CRE\.Rproj$
 ^\.Rproj\.user$
 ^LICENSE\.md$
+^\.github$
+_archive
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
+^doc$
+^Meta$
+index.md
@@ -0,0 +1 @@
+*.html
@@ -0,0 +1,110 @@
+# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
+# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
+on:
+  push:
+    branches:
+      - main
+      - develop
+  pull_request:
+    branches:
+      - main
+      - develop
+
+name: R-CMD-check
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: windows-latest, r: 'release'}
+          #- {os: macOS-latest, r: 'release'}
+          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
+
+    env:
+      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
+      RSPM: ${{ matrix.config.rspm }}
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-r@v1
+        with:
+          r-version: ${{ matrix.config.r }}
+
+      - uses: r-lib/actions/setup-pandoc@v1
+
+      - name: Query dependencies
+        run: |
+          install.packages('remotes')
+          saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
+          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
+        shell: Rscript {0}
+
+      - name: Restore R package cache
+        uses: actions/cache@v2
+        with:
+          path: ${{ env.R_LIBS_USER }}
+          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
+          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
+
+      - name: Install system dependencies
+        if: runner.os == 'Linux'
+        run: |
+          while read -r cmd
+          do
+            eval sudo $cmd
+          done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
+
+      - name: Install omp
+        if: runner.os == 'macOS'
+        run: |
+              brew install gfortran
+              brew install llvm boost libomp
+              LDFLAGS_cmd=$(brew info llvm | grep "export LDFLAGS=")
+              LDFLAGS_path=$(echo $LDFLAGS_cmd | awk '{print $2}' | sed 's/"//g')
+              eval $LDFLAGS_cmd
+              clang_path=$(which clang)
+              clangpp_path=$(which clang++)
+              mkdir ~/.R
+              touch ~/.R/Makevars
+              echo "CC=$clang_path" >> ~/.R/Makevars
+              echo "CXX=$clangpp_path" >> ~/.R/Makevars
+              echo "CXX11=$clangpp_path" >> ~/.R/Makevars
+              echo "CXX14=$clangpp_path" >> ~/.R/Makevars
+              echo "CXX17=$clangpp_path" >> ~/.R/Makevars
+              echo "CXX1X=$clangpp_path" >> ~/.R/Makevars
+              echo "$LDFLAGS_path -o task -fopenmp" >> ~/.R/Makevars
+
+
+      - name: Install dependencies
+        run: |
+          remotes::install_deps(dependencies = TRUE)
+          remotes::install_cran("rcmdcheck")
+          remotes::install_cran("devtools")
+          remotes::install_cran("RcppParallel")
+          devtools::install_github("JingyuHe/XBART")
+          devtools::install_github("socket778/XBCF")
+        shell: Rscript {0}
+
+      - name: Check
+        env:
+          _R_CHECK_CRAN_INCOMING_REMOTE_: false
+        run: |
+          options(crayon.enabled = TRUE)
+          rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
+        shell: Rscript {0}
+
+      - name: Upload check results
+        if: failure()
+        uses: actions/upload-artifact@main
+        with:
+          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
+          path: check
@@ -1,15 +1,77 @@
 Package: CRE
-Title: What the Package Does (One Line, Title Case)
+Title: Interpretable Subgroups Identification through Ensemble Learning of 
+    Causal Rules
 Version: 0.0.1
-Authors@R: 
+Authors@R: c(
     person(given="Kwonsang", 
            family = "Lee",
            role = c("aut"),
            email = "[email protected]",
-           comment = c(ORCID = "0000-0002-5823-4331"))
-Description: What the package does (one paragraph).
+           comment = c(ORCID = "0000-0002-5823-4331")),
+    person(given="Falco Joannes", 
+           family = "Bargagli Stoffi",
+           role = c("aut"),
+           email = "[email protected]",
+           comment = c(ORCID = "0000-0002-6131-8165")),
+    person(given="Daniela Maria", 
+           family = "Garcia",
+           role = c("aut"),
+           email = "[email protected]",
+           comment = c(ORCID = "0000-0003-3226-3561")),
+    person(given="Naeem",
+           family="Khoshnevis",
+           email = "[email protected]",
+           role=c("aut","cre"), 
+           comment = c(ORCID = "0000-0003-4315-1426", AFFILIATION="FASRC")))
+Maintainer: Naeem Khoshnevis <[email protected]>
+Description: Provides an interpretable identification of subgroups with 
+    heterogeneous causal effect. The heterogeneous subgroups are discovered 
+    through ensemble learning of causal rules. Causal rules are highly 
+    interpretable if-then statement that recursively partition the features 
+    space into heterogeneous subgroups. A small number of significant causal 
+    rules are selected through Stability Selection to control for family-wise 
+    error rate in the finite sample setting. It proposes various estimation 
+    methods for the conditional causal effects for each discovered causal rule. 
+    It is highly flexible and multiple causal estimands and imputation methods 
+    are implemented.  
+    Lee, K., Bargagli-Stoffi, F. J., & Dominici, F. (2020). Causal rule ensemble:
+    Interpretable inference of heterogeneous treatment effects. 
+    arXiv preprint <arXiv:2009.09036>.
 License: GPL-3
+URL: https://github.com/fasrc/CRE
+BugReports: https://github.com/fasrc/CRE/issues
+Copyright: Harvard University
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.1.1
+Suggests: 
+    knitr,
+    rmarkdown,
+    testthat (>= 3.0.0)
+Config/testthat/edition: 3
+Imports: tidyverse,
+         dplyr,
+         BART,
+         bcf,
+         randomForest,
+         xgboost,
+         gbm,
+         inTrees,
+         stabs,
+         rpart,
+         grf,
+         glmnet,
+         MASS,
+         doParallel,
+         foreach,
+         devtools,
+         XBART,
+         XBCF,
+         stringr,
+         bartCause,
+         gnm,
+         magrittr,
+         baggr,
+         logger
+VignetteBuilder: knitr
@@ -1,16 +1,27 @@
 # Generated by roxygen2: do not edit by hand
 
 export(analyze_sensitivity)
+export(check_input_data)
 export(cre)
 export(estimate_cate)
-export(estimate_ipw_sipw)
 export(estimate_ite)
 export(estimate_ite_bart)
 export(estimate_ite_bcf)
 export(estimate_ite_cf)
 export(estimate_ite_ipw)
 export(estimate_ite_or)
+export(estimate_ite_poisson)
+export(estimate_ite_sipw)
+export(estimate_ite_xbart)
+export(estimate_ite_xbcf)
+export(estimate_ps)
+export(extract_rules)
+export(generate_cre_dataset)
 export(generate_rules)
-export(generate_ruls_matrix)
+export(generate_rules_matrix)
+export(get_logger)
+export(interpret_select_rules)
 export(select_causal_rules)
+export(set_logger)
 export(split_data)
+export(take1)
@@ -1,10 +1,33 @@
-## CRE (devloping version)
-
-### Added
+## CRE 0.0.1 (2021-10-20)
 
 ### Changed
+- `estimate_cate` include two methods for estimating the CATE values
+- `cre` added initial checks for binary outcome and whether to include the propensity score in the ITE estimation
+- `estimate_ite_xyz` conduct propensity score estimation using helper function
+- Removed `seed` as an input from `generate_cre_dataset` function.
 
-### Fixed
-
-### Removed
-
+### Added
+- `set_logger` and `get_logger`
+- `check_input_data` function
+- example to `generate_cre_dataset`
+- `generate_cre_dataset` function to generate synthetic data for testing the package
+- `test-generate_cre_dataset` function test
+- `estimate_ps` function to estimate the propensity score
+- `estimate_ite_xbart` function to generate ITE estimates using accelerated BART
+- `estimate_ite_xbcf` function to generate ITE estimates using accelerated BCF
+- `analyze_sensitivity` function to conduct sensitivity analysis for unmeasured confounding
+- `cre` function to perform the entire Causal Rule Ensemble method
+- `estimate_cate` function to generate CATE estimates from the ITE estimates and select rules
+- `estimate_ite` function to generate ITE estimates using the user-specified method (calls the other `estimate_ite_xyz` functions)
+- `estimate_ite_bart` function to generate ITE estimates using BART
+- `estimate_ite_bcf` function to generate ITE estimates using Bayesian Causal Forests
+- `estimate_ite_cf` function to generate ITE estimates using Causal Forests
+- `estimate_ite_ipw` function to generate ITE estimates using IPW
+- `estimate_ite_or` function to generate ITE estimates using Outcome Regression
+- `estimate_ite_sipw` function to generate ITE estimates using SIPW
+- `extract_rules` function to extract a list of causal rules from randomForest and GBM models
+- `generate_rules` function to generate causal rule models using randomForest and GBM methods
+- `generate_rules_matrix` function to convert a list of causal rules into a matrix
+- `select_causal_rules` function to apply penalized regression to causal rules to select only the most important ones
+- `split_data` function to split input data into discovery and inference subsamples
+- `take1` function to create a subsample of indices
@@ -1,19 +1,16 @@
 #' @title
-#' Title
+#' Sensitivity Analysis
 #'
 #' @description
-#' Description
+#' Method for analyzing the sensitivity of the estimates of the causal rule-specific treatment effects
 #'
-#' @param tau input value description
-#' @param X input value description
-#' @param select_rules input value description
-#' @param method input value description
+#' @param ite_std the standardized ITE
+#' @param rules_matrix_std the standardized causal rules matrix
+#'
+#' @return a list containing the results of the sensitivity analysis
 #'
-#' @return
 #' @export
 #'
-#' @examples
-#' TBD
-analyze_sensitivity <- function(tau, X, select_rules, method){
+analyze_sensitivity <- function(ite_std, rules_matrix_std) {
   # TBD
 }
@@ -0,0 +1,51 @@
+#' @title
+#' Check Input Data
+#'
+#' @description
+#' Conducts sanity checks for the input data
+#'
+#' @param y the observed response vector
+#' @param z the treatment vector
+#' @param X the features matrix
+#'
+#' @return
+#' Number of data samples.
+#'
+#' @export
+#'
+check_input_data <- function(y, z, X){
+
+  #---------------- Input data checks ------------------------------------------
+  ## type
+  if (!is.vector(y) & !is.numeric(y)){
+    stop("Observed response vector (y) input values should be a numerical vector")
+  }
+
+  if (!is.vector(z) & !is.numeric(z)){
+    stop("Treatment (z) input values should be a numerical vector.")
+  }
+
+  # if (!is.data.frame(X)){
+  #   stop("Covariates (x) input values should be a data.frame.")
+  # }
+
+  ## size
+  y_size <- length(y)
+  z_size <- length(z)
+
+  if (y_size != z_size){
+    stop(paste("Response and and treatment vectors should be the same size. ",
+               "Current values: ", y_size, ", ", z_size))
+  }
+
+  covars_size <- dim(X)
+
+  if (covars_size[1] != y_size){
+    stop(paste("Covariates (X) data.frame has different number of",
+               "observation than response and treatment vectors.",
+               "Current values: ", covars_size, ", ", y_size))
+  }
+  #-----------------------------------------------------------------------------
+
+  invisible(y_size)
+}