ssec-jhu
diff --git a/‎.github/workflows/pr_checks.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/pr_checks.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/style.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/style.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 7 additions & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 7 additions & 1 deletion
diff --git a/‎doc/conf.py
Lines changed: 1 addition & 0 deletions b/‎doc/conf.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/use.rst
Lines changed: 1 addition & 1 deletion b/‎doc/use.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/whats_new/v0.6.rst
Lines changed: 3 additions & 0 deletions b/‎doc/whats_new/v0.6.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/calibration/plot_overlapping_gaussians.py
Lines changed: 2 additions & 2 deletions b/‎examples/calibration/plot_overlapping_gaussians.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml
Lines changed: 0 additions & 1 deletion b/‎pyproject.toml
Lines changed: 0 additions & 1 deletion
diff --git a/‎sktree/_lib/sklearn_fork b/‎sktree/_lib/sklearn_fork
diff --git a/‎sktree/datasets/hyppo.py
Lines changed: 37 additions & 12 deletions b/‎sktree/datasets/hyppo.py
Lines changed: 37 additions & 12 deletions
@@ -38,14 +38,14 @@ jobs:
             exit 0
           fi
           all_changelogs=$(cat ./doc/whats_new/v*.rst)
-          if [[ "$all_changelogs" =~ :pr:\`$PR_NUMBER\` ]]
+          if [[ "$all_changelogs" =~ :pr:\`#$PR_NUMBER\` ]]
           then
             echo "Changelog has been updated."
             # If the pull request is milestoned check the correspondent changelog
             if exist -f ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst
             then
               expected_changelog=$(cat ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst)
-              if [[ "$expected_changelog" =~ :pr:\`$PR_NUMBER\` ]]
+              if [[ "$expected_changelog" =~ :pr:\`#$PR_NUMBER\` ]]
               then
                 echo "Changelog and milestone correspond."
               else
@@ -56,7 +56,7 @@ jobs:
               fi
             fi
           else
-            echo "A Changelog entry is missing."
+            echo "A Changelog entry is missing for :pr:\`#$PR_NUMBER\`"
             echo ""
             echo "Please add an entry to the changelog at 'doc/whats_new/v*.rst'"
             echo "to document your change assuming that the PR will be merged"
 
@@ -29,10 +29,10 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
-      - name: Setup Python 3.10
+      - name: Setup Python 3.11
         uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "3.11"
           architecture: "x64"
 
       - name: Install packages for Ubuntu
 
@@ -5,6 +5,12 @@ repos:
       - id: black
         args: [--quiet]
 
+  - repo: https://github.com/MarcoGorelli/cython-lint
+    rev: v0.16.0
+    hooks:
+      - id: cython-lint
+      - id: double-quote-cython-strings
+
   # Ruff sktree
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.1.6
@@ -65,7 +71,7 @@ repos:
       - id: rstcheck
         additional_dependencies:
           - tomli
-        files: ^doc/.*\.(rst|inc)$
+        files: ^(?!doc/use\.rst$).*\.(rst|inc)$
 
 ci:
   autofix_prs: false
@@ -246,6 +246,7 @@
     "TreeBuilder",
     "joint_rank",
     "n_dim",
+    "n_samples_bootstrap",
 }
 
 # validation
 
@@ -8,4 +8,4 @@ to learn everything you need!
 
 .. rstcheck: ignore-next-code-block
 .. include:: auto_examples/index.rst
-   :start-after: :orphan:
+   :start-after: :orphan:
@@ -29,6 +29,9 @@ Changelog
   has a generative model based on Trunk and banded covariance, :func:`sktree.datasets.approximate_clf_mutual_information` and
   :func:`sktree.datasets.approximate_clf_mutual_information_with_monte_carlo` to
   approximate mutual information either numerically or via Monte-Carlo, by `Adam Li`_ and `Haoyin Xu`_ (:pr:`#199`).
+- |Enhancement| :class:`sktree.HonestForestClassifier` now has a fitted
+  property ``oob_samples_``, which reproduces the sample indices per tree that is out
+  of bag, by `Adam Li`_ (:pr:`#200`).
 
 
 Code and Documentation Contributors
 
@@ -65,7 +65,7 @@
     (
         "IRF",
         CalibratedClassifierCV(
-            base_estimator=RandomForestClassifier(
+            estimator=RandomForestClassifier(
                 n_estimators=n_estimators // clf_cv,
                 max_features=max_features,
                 n_jobs=n_jobs,
@@ -77,7 +77,7 @@
     (
         "SigRF",
         CalibratedClassifierCV(
-            base_estimator=RandomForestClassifier(
+            estimator=RandomForestClassifier(
                 n_estimators=n_estimators // clf_cv,
                 max_features=max_features,
                 n_jobs=n_jobs,
 
@@ -293,7 +293,6 @@ ignore_roles = [
 ]
 report_level = "WARNING"
 ignore = ["SEVERE/4"]
-paths = ["doc/use.rst"]
 
 [tool.ruff]
 extend-exclude = [
 
@@ -50,6 +50,7 @@ def make_quadratic_classification(n_samples: int, n_features: int, noise=False,
 def make_trunk_classification(
     n_samples,
     n_dim=10,
+    n_informative=10,
     m_factor: int = -1,
     rho: int = 0,
     band_type: str = "ma",
@@ -76,6 +77,9 @@ def make_trunk_classification(
     n_dim : int, optional
         The dimensionality of the dataset and the number of
         unique labels, by default 10.
+    n_informative : int, optional
+        The informative dimensions. All others for ``n_dim - n_informative``
+        are uniform noise.
     m_factor : int, optional
         The multiplicative factor to apply to the mean-vector of the first
         distribution to obtain the mean-vector of the second distribution.
@@ -108,25 +112,30 @@ def make_trunk_classification(
     ----------
     .. footbibliography::
     """
+    if n_dim < n_informative:
+        raise ValueError(
+            f"Number of informative dimensions {n_informative} must be less than number "
+            f"of dimensions, {n_dim}"
+        )
     rng = np.random.default_rng(seed=seed)
 
-    mu_1 = np.array([1 / np.sqrt(i) for i in range(1, n_dim + 1)])
+    mu_1 = np.array([1 / np.sqrt(i) for i in range(1, n_informative + 1)])
     mu_0 = m_factor * mu_1
 
     if rho != 0:
         if band_type == "ma":
-            cov = _moving_avg_cov(n_dim, rho)
+            cov = _moving_avg_cov(n_informative, rho)
         elif band_type == "ar":
-            cov = _autoregressive_cov(n_dim, rho)
+            cov = _autoregressive_cov(n_informative, rho)
         else:
             raise ValueError(f'Band type {band_type} must be one of "ma", or "ar".')
     else:
-        cov = np.identity(n_dim)
+        cov = np.identity(n_informative)
 
     if mix < 0 or mix > 1:
         raise ValueError("Mix must be between 0 and 1.")
 
-    if n_dim > 1000:
+    if n_informative > 1000:
         method = "cholesky"
     else:
         method = "svd"
@@ -139,13 +148,29 @@ def make_trunk_classification(
             )
         )
     else:
+        mixture_idx = rng.choice(
+            [0, 1], n_samples // 2, replace=True, shuffle=True, p=[mix, 1 - mix]
+        )
+        X_mixture = np.zeros((n_samples // 2, len(mu_1)))
+        for idx in range(n_samples // 2):
+            if mixture_idx[idx] == 1:
+                X_sample = rng.multivariate_normal(mu_1, cov, 1, method=method)
+            else:
+                X_sample = rng.multivariate_normal(mu_0, cov, 1, method=method)
+            X_mixture[idx, :] = X_sample
+
         X = np.vstack(
             (
-                rng.multivariate_normal(np.zeros(n_dim), cov, n_samples // 2, method=method),
-                (1 - mix) * rng.multivariate_normal(mu_1, cov, n_samples // 2, method=method)
-                + mix * rng.multivariate_normal(mu_0, cov, n_samples // 2, method=method),
+                rng.multivariate_normal(
+                    np.zeros(n_informative), cov, n_samples // 2, method=method
+                ),
+                X_mixture,
             )
         )
+
+    if n_dim > n_informative:
+        X = np.hstack((X, rng.uniform(low=0, high=1, size=(n_samples, n_dim - n_informative))))
+
     y = np.concatenate((np.zeros(n_samples // 2), np.ones(n_samples // 2)))
 
     if return_params:
@@ -208,19 +233,19 @@ def approximate_clf_mutual_information(
     # this implicitly assumes that the signal of interest is between -10 and 10
     scale = 10
     n_dims = [cov.shape[1] for cov in covs]
-    lims = [[-scale, scale]] * n_dims
+    lims = [[-scale, scale]] * max(n_dims)
 
     # Compute entropy and X and Y.
     def func(*args):
         x = np.array(args)
         p = 0
         for k in range(len(means)):
-            p += class_probs[k] * multivariate_normal(seed=seed).pdf(x, means[k], covs[k])
+            p += class_probs[k] * multivariate_normal.pdf(x, means[k], covs[k])
         return -p * np.log(p) / np.log(base)
 
     # numerically integrate H(X)
-    opts = dict(limit=1000)
-    H_X, int_err = nquad(func, lims, opts=opts)
+    # opts = dict(limit=1000)
+    H_X, int_err = nquad(func, lims)
 
     # Compute MI.
     H_XY = 0
Original file line number	Diff line number	Diff line change
`@@ -246,6 +246,7 @@`
`246`	`246`	`"TreeBuilder",`
`247`	`247`	`"joint_rank",`
`248`	`248`	`"n_dim",`
	`249`	`+ "n_samples_bootstrap",`
`249`	`250`	`}`
`250`	`251`
`251`	`252`	`# validation`
Original file line number	Diff line number	Diff line change
`@@ -293,7 +293,6 @@ ignore_roles = [`
`293`	`293`	`]`
`294`	`294`	`report_level = "WARNING"`
`295`	`295`	`ignore = ["SEVERE/4"]`
`296`		`-paths = ["doc/use.rst"]`
`297`	`296`
`298`	`297`	`[tool.ruff]`
`299`	`298`	`extend-exclude = [`