From fbb4b4485cd4f99e90bfd109a4d80d76ca3300c6 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Thu, 14 Nov 2024 15:11:56 -0600
Subject: [PATCH 01/19] [#161] Add xgboost as an optional dependency

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index efa43f0..5f150de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ dev = [
     "sphinx==8.1.3",
     "recommonmark==0.7.1",
 ]
+xgboost = ["xgboost>=2.0"]
 
 [project.scripts]
 hlink = "hlink.scripts.main:cli"

From a51f20f1e08792c0fe318296bf05c8a9eebcfdba Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Thu, 14 Nov 2024 15:27:06 -0600
Subject: [PATCH 02/19] [#161] Add a test for xgboost classifier support

This test is currently failing if you have xgboost installed. If you don't have
xgboost installed, it skips itself to prevent failures due to missing packages
and dependencies.
---
 hlink/tests/core/classifier_test.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 hlink/tests/core/classifier_test.py

diff --git a/hlink/tests/core/classifier_test.py b/hlink/tests/core/classifier_test.py
new file mode 100644
index 0000000..1010262
--- /dev/null
+++ b/hlink/tests/core/classifier_test.py
@@ -0,0 +1,23 @@
+import pytest
+
+from hlink.linking.core.classifier import choose_classifier
+
+try:
+    import xgboost
+except ModuleNotFoundError:
+    xgboost_available = False
+else:
+    xgboost_available = True
+
+@pytest.mark.skipif(not xgboost_available, reason="requires the xgboost library")
+def test_choose_classifier_supports_xgboost():
+    """
+    If the xgboost module is installed, then choose_classifier() supports a model
+    type of "xgboost".
+    """
+    params = {
+        "max_depth": 2,
+        "eta": 0.5,
+    }
+    classifier = choose_classifier("xgboost", params, "match")
+    assert classifier.getLabelCol() == "match"

From 010f3f54f01035801fd94a9755e49631840340e0 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Thu, 14 Nov 2024 15:32:04 -0600
Subject: [PATCH 03/19] [#161] Run black

---
 hlink/tests/core/classifier_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hlink/tests/core/classifier_test.py b/hlink/tests/core/classifier_test.py
index 1010262..4ffa37a 100644
--- a/hlink/tests/core/classifier_test.py
+++ b/hlink/tests/core/classifier_test.py
@@ -9,6 +9,7 @@
 else:
     xgboost_available = True
 
+
 @pytest.mark.skipif(not xgboost_available, reason="requires the xgboost library")
 def test_choose_classifier_supports_xgboost():
     """

From a865825237060862269fb1635c68c79a897726db Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Thu, 14 Nov 2024 15:38:00 -0600
Subject: [PATCH 04/19] [#161] Ignore flake8 unused import error

---
 hlink/tests/core/classifier_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hlink/tests/core/classifier_test.py b/hlink/tests/core/classifier_test.py
index 4ffa37a..7616acb 100644
--- a/hlink/tests/core/classifier_test.py
+++ b/hlink/tests/core/classifier_test.py
@@ -3,7 +3,7 @@
 from hlink.linking.core.classifier import choose_classifier
 
 try:
-    import xgboost
+    import xgboost  # noqa: F401
 except ModuleNotFoundError:
     xgboost_available = False
 else:

From 287912ec8e74099db172e652b26cf33c4c800e11 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Thu, 14 Nov 2024 16:06:38 -0600
Subject: [PATCH 05/19] [#161] Create a SparkXGBClassifier in
 choose_classifier() for model_type xgboost

This is only possible when we have the xgboost module, so raise an error if
that is not present.
---
 hlink/linking/core/classifier.py    | 23 ++++++++++++++++++++++-
 hlink/tests/core/classifier_test.py |  2 +-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/hlink/linking/core/classifier.py b/hlink/linking/core/classifier.py
index 0efaf38..bf9f898 100644
--- a/hlink/linking/core/classifier.py
+++ b/hlink/linking/core/classifier.py
@@ -13,6 +13,13 @@
 )
 import hlink.linking.transformers.rename_prob_column
 
+try:
+    import xgboost.spark
+except ModuleNotFoundError:
+    _xgboost_available = False
+else:
+    _xgboost_available = True
+
 
 def choose_classifier(model_type, params, dep_var):
     """Returns a classifier and a post_classification transformer given model type and params.
@@ -96,7 +103,21 @@ def choose_classifier(model_type, params, dep_var):
         post_transformer = (
             hlink.linking.transformers.rename_prob_column.RenameProbColumn()
         )
-
+    elif model_type == "xgboost":
+        if not _xgboost_available:
+            raise ModuleNotFoundError(
+                "model_type 'xgboost' requires the xgboost library"
+            )
+        params_without_threshold = {
+            key: val
+            for key, val in params.items()
+            if key not in {"threshold", "threshold_ratio"}
+        }
+        classifier = xgboost.spark.SparkXGBClassifier(
+            **params_without_threshold,
+            features_col=features_vector,
+            label_col=dep_var,
+        )
     else:
         raise ValueError(
             "Model type not recognized! Please check your config, reload, and try again."
diff --git a/hlink/tests/core/classifier_test.py b/hlink/tests/core/classifier_test.py
index 7616acb..cf7d2bd 100644
--- a/hlink/tests/core/classifier_test.py
+++ b/hlink/tests/core/classifier_test.py
@@ -20,5 +20,5 @@ def test_choose_classifier_supports_xgboost():
         "max_depth": 2,
         "eta": 0.5,
     }
-    classifier = choose_classifier("xgboost", params, "match")
+    classifier, _post_transformer = choose_classifier("xgboost", params, "match")
     assert classifier.getLabelCol() == "match"

From a7b0c37f164ea1af76ebf1a8881c3738982cc2ce Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Fri, 15 Nov 2024 09:11:29 -0600
Subject: [PATCH 06/19] [#161] Add a test that runs the whole training task
 with an xgboost model

This test is failing right now because we also need pyarrow>=4 when using
xgboost. We should add this as a dependency in the xgboost extra. If xgboost
isn't installed, this test skips itself.
---
 hlink/tests/training_test.py | 51 ++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/hlink/tests/training_test.py b/hlink/tests/training_test.py
index 0fbdb0a..94be4f4 100644
--- a/hlink/tests/training_test.py
+++ b/hlink/tests/training_test.py
@@ -7,6 +7,13 @@
 from pyspark.ml import Pipeline
 import hlink.linking.core.pipeline as pipeline_core
 
+try:
+    import xgboost  # noqa: F401
+except ModuleNotFoundError:
+    xgboost_available = False
+else:
+    xgboost_available = True
+
 
 @pytest.mark.quickcheck
 def test_all_steps(
@@ -432,6 +439,50 @@ def test_step_3_with_probit_model(
     )
 
 
+@pytest.mark.skipif(not xgboost_available, reason="requires the xgboost library")
+def test_step_3_with_xgboost_model(
+    spark, training, training_conf, datasource_training_input
+):
+    training_data_path, prepped_df_a_path, prepped_df_b_path = datasource_training_input
+    training_conf["comparison_features"] = [
+        {
+            "alias": "regionf",
+            "column_name": "region",
+            "comparison_type": "fetch_a",
+            "categorical": True,
+        },
+        {
+            "alias": "namelast_jw",
+            "column_name": "namelast",
+            "comparison_type": "jaro_winkler",
+        },
+    ]
+    training_conf["training"]["dataset"] = training_data_path
+    training_conf["training"]["dependent_var"] = "match"
+    training_conf["training"]["independent_vars"] = ["namelast_jw", "regionf"]
+    training_conf["training"]["chosen_model"] = {
+        "type": "xgboost",
+        "max_depth": 2,
+        "eta": 0.5,
+        "threshold": 0.7,
+        "threshold_ratio": 1.3,
+    }
+    training_conf["training"]["score_with_model"] = True
+    training_conf["training"]["feature_importances"] = True
+
+    spark.read.csv(prepped_df_a_path, header=True, inferSchema=True).write.mode(
+        "overwrite"
+    ).saveAsTable("prepped_df_a")
+    spark.read.csv(prepped_df_b_path, header=True, inferSchema=True).write.mode(
+        "overwrite"
+    ).saveAsTable("prepped_df_b")
+
+    training.run_step(0)
+    training.run_step(1)
+    training.run_step(2)
+    training.run_step(3)
+
+
 def test_step_3_requires_table(training_conf, training):
     training_conf["training"]["feature_importances"] = True
     with pytest.raises(RuntimeError, match="Missing input tables"):

From 5c6fdc9ad3686d38dc6be6a5f79d198e6c8d3b6f Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Fri, 15 Nov 2024 15:56:52 +0000
Subject: [PATCH 07/19] [#161] Update the Dockerfile to support build with
 different hlink extras

This should let us have two different test setups for each Python version. One
with xgboost, one without.
---
 Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 80d5c6e..0f2e036 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,6 @@
 ARG PYTHON_VERSION=3.10
 FROM python:${PYTHON_VERSION}
+ARG HLINK_EXTRAS=dev
 
 RUN apt-get update && apt-get install default-jre-headless -y
 
@@ -8,4 +9,4 @@ WORKDIR /hlink
 
 COPY . .
 RUN python -m pip install --upgrade pip
-RUN pip install -e .[dev]
+RUN pip install -e .[${HLINK_EXTRAS}]

From a2598112fdcb15012a7729a2be4a4289dad5021b Mon Sep 17 00:00:00 2001
From: Riley Harper <52982949+riley-harper@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:09:02 -0600
Subject: [PATCH 08/19] [#161] Update docker-build.yml to run tests with and
 without xgboost

I've also updated pytest to be more verbose for clarity.
---
 .github/workflows/docker-build.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index 4e42786..fe2b229 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -12,12 +12,13 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.10", "3.11", "3.12"]
+        hlink_extras: ["dev", "dev,xgboost"]
     runs-on: ubuntu-latest
 
     steps:
     - uses: actions/checkout@v4
     - name: Build the Docker image
-      run: docker build . --file Dockerfile --tag $HLINK_TAG-${{ matrix.python_version}} --build-arg PYTHON_VERSION=${{ matrix.python_version }}
+      run: docker build . --file Dockerfile --tag $HLINK_TAG-${{ matrix.python_version}} --build-arg PYTHON_VERSION=${{ matrix.python_version }} --build-arg HLINK_EXTRAS=${{ matrix.hlink_extras }}
 
     - name: Check dependency versions
       run: |
@@ -32,7 +33,7 @@ jobs:
       run: docker run $HLINK_TAG-${{ matrix.python_version}} flake8 --count .
       
     - name: Test
-      run: docker run $HLINK_TAG-${{ matrix.python_version}} pytest
+      run: docker run $HLINK_TAG-${{ matrix.python_version}} pytest -v
     
     - name: Build sdist and wheel
       run: docker run $HLINK_TAG-${{ matrix.python_version}} python -m build

From a95992cf472c8c41749e52679da5a156c7299b06 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Fri, 15 Nov 2024 13:17:01 -0600
Subject: [PATCH 09/19] [#161] Add pyarrow as a dependency for the xgboost
 extra

---
 pyproject.toml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5f150de..9397ee2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,10 @@ dev = [
     "sphinx==8.1.3",
     "recommonmark==0.7.1",
 ]
-xgboost = ["xgboost>=2.0"]
+xgboost = [
+    "xgboost>=2.0",
+    "pyarrow>=4.0",
+]
 
 [project.scripts]
 hlink = "hlink.scripts.main:cli"

From c64cf43c8cfaacca1a44d104d8ae2a2860229ea3 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Fri, 15 Nov 2024 13:44:22 -0600
Subject: [PATCH 10/19] [#161] Factor conditional xgboost test logic into a
 single marker

---
 hlink/tests/core/classifier_test.py | 12 ++----------
 hlink/tests/markers.py              | 14 ++++++++++++++
 hlink/tests/training_test.py        | 10 ++--------
 3 files changed, 18 insertions(+), 18 deletions(-)
 create mode 100644 hlink/tests/markers.py

diff --git a/hlink/tests/core/classifier_test.py b/hlink/tests/core/classifier_test.py
index cf7d2bd..473dae1 100644
--- a/hlink/tests/core/classifier_test.py
+++ b/hlink/tests/core/classifier_test.py
@@ -1,16 +1,8 @@
-import pytest
-
 from hlink.linking.core.classifier import choose_classifier
-
-try:
-    import xgboost  # noqa: F401
-except ModuleNotFoundError:
-    xgboost_available = False
-else:
-    xgboost_available = True
+from hlink.tests.markers import requires_xgboost
 
 
-@pytest.mark.skipif(not xgboost_available, reason="requires the xgboost library")
+@requires_xgboost
 def test_choose_classifier_supports_xgboost():
     """
     If the xgboost module is installed, then choose_classifier() supports a model
diff --git a/hlink/tests/markers.py b/hlink/tests/markers.py
new file mode 100644
index 0000000..cc264b1
--- /dev/null
+++ b/hlink/tests/markers.py
@@ -0,0 +1,14 @@
+import pytest
+
+try:
+    import xgboost  # noqa: F401
+except ModuleNotFoundError:
+    xgboost_available = False
+else:
+    xgboost_available = True
+
+requires_xgboost = pytest.mark.skipif(
+    not xgboost_available, reason="requires the xgboost library"
+)
+"""For tests which require the xgboost library. This checks whether xgboost is
+installed and skips the test if it is not."""
diff --git a/hlink/tests/training_test.py b/hlink/tests/training_test.py
index 94be4f4..7ca5908 100644
--- a/hlink/tests/training_test.py
+++ b/hlink/tests/training_test.py
@@ -6,13 +6,7 @@
 import pytest
 from pyspark.ml import Pipeline
 import hlink.linking.core.pipeline as pipeline_core
-
-try:
-    import xgboost  # noqa: F401
-except ModuleNotFoundError:
-    xgboost_available = False
-else:
-    xgboost_available = True
+from hlink.tests.markers import requires_xgboost
 
 
 @pytest.mark.quickcheck
@@ -439,7 +433,7 @@ def test_step_3_with_probit_model(
     )
 
 
-@pytest.mark.skipif(not xgboost_available, reason="requires the xgboost library")
+@requires_xgboost
 def test_step_3_with_xgboost_model(
     spark, training, training_conf, datasource_training_input
 ):

From 88d719964b10e1656ed00703d4ce1e318b00dfcc Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Fri, 15 Nov 2024 15:21:06 -0600
Subject: [PATCH 11/19] [#161] Add an integration test for xgboost, set the
 post-transformer

Like some of the other models, xgboost returns an array of probabilities like
[probability_no, probability_yes]. So we extract just probability_yes as our
probability for hlink purposes.
---
 hlink/linking/core/classifier.py              |  4 +
 ...egration_score_with_trained_models_test.py | 96 +++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/hlink/linking/core/classifier.py b/hlink/linking/core/classifier.py
index bf9f898..65295fe 100644
--- a/hlink/linking/core/classifier.py
+++ b/hlink/linking/core/classifier.py
@@ -117,6 +117,10 @@ def choose_classifier(model_type, params, dep_var):
             **params_without_threshold,
             features_col=features_vector,
             label_col=dep_var,
+            probability_col="probability_array",
+        )
+        post_transformer = SQLTransformer(
+            statement="SELECT *, parseProbVector(probability_array, 1) as probability FROM __THIS__"
         )
     else:
         raise ValueError(
diff --git a/hlink/tests/integration_score_with_trained_models_test.py b/hlink/tests/integration_score_with_trained_models_test.py
index 993a497..c04819c 100644
--- a/hlink/tests/integration_score_with_trained_models_test.py
+++ b/hlink/tests/integration_score_with_trained_models_test.py
@@ -3,6 +3,8 @@
 # in this project's top-level directory, and also on-line at:
 #   https://github.com/ipums/hlink
 
+from hlink.tests.markers import requires_xgboost
+
 
 def test_apply_chosen_model_RF(
     spark,
@@ -859,6 +861,100 @@ def test_step_3_apply_chosen_model_boosted_trees(
     )
 
 
+@requires_xgboost
+def test_apply_chosen_model_xgboost(
+    spark,
+    training,
+    matching,
+    training_conf,
+    datasource_training_input,
+    potential_matches_path,
+    state_dist_path,
+    spark_test_tmp_dir_path,
+):
+    training_data_path, prepped_df_a_path, prepped_df_b_path = datasource_training_input
+    training_conf["comparison_features"] = [
+        {
+            "alias": "regionf",
+            "column_name": "region",
+            "comparison_type": "fetch_a",
+            "categorical": True,
+        },
+        {
+            "alias": "namelast_jw",
+            "column_name": "namelast",
+            "comparison_type": "jaro_winkler",
+        },
+        {
+            "alias": "state_distance",
+            "key_count": 1,
+            "column_name": "bpl",
+            "comparison_type": "geo_distance",
+            "loc_a": "statecode1",
+            "loc_b": "statecode2",
+            "distance_col": "dist",
+            "table_name": "state_distances_lookup",
+            "distances_file": state_dist_path,
+        },
+    ]
+
+    training_conf["training"]["dataset"] = training_data_path
+    training_conf["training"]["dependent_var"] = "match"
+    training_conf["training"]["independent_vars"] = [
+        "namelast_jw",
+        "regionf",
+        "state_distance",
+    ]
+    training_conf["training"]["chosen_model"] = {
+        "type": "xgboost",
+        "max_depth": 5,
+        "eta": 0.5,
+        "threshold": 0.5,
+        "threshold_ratio": 1.3,
+    }
+    training_conf["training"]["score_with_model"] = True
+    training_conf["spark_tmp_dir"] = spark_test_tmp_dir_path
+    training_conf["drop_data_from_scored_matches"] = True
+
+    prepped_df_a = spark.read.csv(prepped_df_a_path, header=True, inferSchema=True)
+    prepped_df_b = spark.read.csv(prepped_df_b_path, header=True, inferSchema=True)
+    potential_matches = spark.read.csv(
+        potential_matches_path, header=True, inferSchema=True
+    )
+    prepped_df_a.write.mode("overwrite").saveAsTable("prepped_df_a")
+    prepped_df_b.write.mode("overwrite").saveAsTable("prepped_df_b")
+    potential_matches.write.mode("overwrite").saveAsTable("potential_matches")
+
+    training.run_all_steps()
+    matching.run_step(2)
+
+    potential_matches_df = spark.table("scored_potential_matches").toPandas()
+
+    # Check one case that we expect to be a match and one case that we expect not
+    # to be a match.
+    should_be_match = potential_matches_df.query(
+        "id_a == '0202928A-AC3E-48BB-8568-3372067F35C7'"
+    )
+    assert (
+        should_be_match.shape[0] == 1
+    ), "expected exactly one potential match for 0202928A"
+    assert should_be_match["probability"].iloc[0] >= 0.5
+    assert should_be_match["prediction"].iloc[0] == 1
+
+    # In the real world, this would probably be a match, depending on how the
+    # additional features looked. But we've included so few training features
+    # for our test model that small differences in names can really hurt a
+    # potential match's chances of being classified as a match.
+    should_not_be_match = potential_matches_df.query(
+        "id_b == '033FD0FA-C523-42B5-976A-751E830F7021'"
+    )
+    assert (
+        should_not_be_match.shape[0] == 1
+    ), "expected exactly one potential match for 033FD0FA"
+    assert should_not_be_match["probability"].iloc[0] <= 0.5
+    assert should_not_be_match["prediction"].iloc[0] == 0
+
+
 def test_step_3_apply_chosen_model_RF_threshold(
     spark,
     training_conf,

From 97aa7e2f65800b2cb8ae69081f8a7880bebf0d9f Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Mon, 18 Nov 2024 10:54:19 -0600
Subject: [PATCH 12/19] [#161] Update test to check xgboost
 training_feature_importances

xgboost has a different setup for feature importances, so the current logic
ignores it. We'll need to update the save model metadata step to include logic
specifically for xgboost.
---
 hlink/tests/training_test.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/hlink/tests/training_test.py b/hlink/tests/training_test.py
index 7ca5908..ccc62d6 100644
--- a/hlink/tests/training_test.py
+++ b/hlink/tests/training_test.py
@@ -476,6 +476,14 @@ def test_step_3_with_xgboost_model(
     training.run_step(2)
     training.run_step(3)
 
+    importances_df = spark.table("training_feature_importances")
+    assert importances_df.columns == [
+        "feature_name",
+        "category",
+        "weight",
+        "average_gain_per_split",
+    ]
+
 
 def test_step_3_requires_table(training_conf, training):
     training_conf["training"]["feature_importances"] = True

From 74231695c58f62c0e3f353bce73c0d9631b78731 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Mon, 18 Nov 2024 12:16:50 -0600
Subject: [PATCH 13/19] [#161] Pull column and category logic before feature
 importances logic

---
 .../training/link_step_save_model_metadata.py | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/hlink/linking/training/link_step_save_model_metadata.py b/hlink/linking/training/link_step_save_model_metadata.py
index 00e3922..fef5ff1 100644
--- a/hlink/linking/training/link_step_save_model_metadata.py
+++ b/hlink/linking/training/link_step_save_model_metadata.py
@@ -63,29 +63,7 @@ def _run(self):
         vector_assembler = pipeline_model.stages[0]
         classifier = pipeline_model.stages[1]
 
-        print("Retrieving model feature importances or coefficients...")
-        try:
-            feature_imp = classifier.coefficients
-        except:
-            try:
-                feature_imp = classifier.featureImportances
-            except:
-                print(
-                    "This model doesn't contain a coefficient or feature importances parameter -- check chosen model type."
-                )
-                return
-            else:
-                label = "Feature importances"
-        else:
-            label = "Coefficients"
-
         column_names = vector_assembler.getInputCols()
-        # We need to convert from numpy float64s to Python floats to avoid type
-        # issues when creating the DataFrame below.
-        feature_importances = [
-            float(importance) for importance in feature_imp.toArray()
-        ]
-
         tf_prepped = self.task.spark.table(f"{table_prefix}training_features_prepped")
         tf_prepped_schema = dict(tf_prepped.dtypes)
         tf_prepped_row = tf_prepped.head()
@@ -108,6 +86,28 @@ def _run(self):
                 base_col = col.removesuffix("_imp")
                 true_cols.append((base_col, None))
 
+        print("Retrieving model feature importances or coefficients...")
+        try:
+            feature_imp = classifier.coefficients
+        except:
+            try:
+                feature_imp = classifier.featureImportances
+            except:
+                print(
+                    "This model doesn't contain a coefficient or feature importances parameter -- check chosen model type."
+                )
+                return
+            else:
+                label = "Feature importances"
+        else:
+            label = "Coefficients"
+
+        # We need to convert from numpy float64s to Python floats to avoid type
+        # issues when creating the DataFrame below.
+        feature_importances = [
+            float(importance) for importance in feature_imp.toArray()
+        ]
+
         true_column_names = [column_name for (column_name, _) in true_cols]
         true_categories = [category for (_, category) in true_cols]
 

From ffba81a0ff829bf7316c82e6ad9bb4763ae222cd Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Mon, 18 Nov 2024 12:49:04 -0600
Subject: [PATCH 14/19] [#161] Support saving model metadata for xgboost

This is really different from the Spark models, so I've made it a special case
instead of trying to integrate it with the previous logic closely. This section
might be due for some refactoring now.
---
 .../training/link_step_save_model_metadata.py | 65 ++++++++++++-------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/hlink/linking/training/link_step_save_model_metadata.py b/hlink/linking/training/link_step_save_model_metadata.py
index fef5ff1..ed98274 100644
--- a/hlink/linking/training/link_step_save_model_metadata.py
+++ b/hlink/linking/training/link_step_save_model_metadata.py
@@ -86,35 +86,52 @@ def _run(self):
                 base_col = col.removesuffix("_imp")
                 true_cols.append((base_col, None))
 
+        true_column_names = [column_name for (column_name, _) in true_cols]
+        true_categories = [category for (_, category) in true_cols]
+        model_type = config[training_conf]["chosen_model"]["type"]
+
         print("Retrieving model feature importances or coefficients...")
-        try:
-            feature_imp = classifier.coefficients
-        except:
+
+        if model_type == "xgboost":
+            raw_weights = classifier.get_feature_importances("weight")
+            raw_gains = classifier.get_feature_importances("gain")
+            keys = [f"f{index}" for index in range(len(true_cols))]
+
+            weights = [raw_weights.get(key, 0.0) for key in keys]
+            gains = [raw_gains.get(key, 0.0) for key in keys]
+            label = "Feature importances (weights and gain)"
+
+            features_df = self.task.spark.createDataFrame(
+                zip(true_column_names, true_categories, weights, gains),
+                "feature_name: string, category: int, weight: double, average_gain_per_split: double",
+            ).sort("feature_name", "category")
+        else:
             try:
-                feature_imp = classifier.featureImportances
+                feature_imp = classifier.coefficients
             except:
-                print(
-                    "This model doesn't contain a coefficient or feature importances parameter -- check chosen model type."
-                )
-                return
+                try:
+                    feature_imp = classifier.featureImportances
+                except:
+                    print(
+                        "This model doesn't contain a coefficient or feature importances parameter -- check chosen model type."
+                    )
+                    return
+                else:
+                    label = "Feature importances"
             else:
-                label = "Feature importances"
-        else:
-            label = "Coefficients"
+                label = "Coefficients"
 
-        # We need to convert from numpy float64s to Python floats to avoid type
-        # issues when creating the DataFrame below.
-        feature_importances = [
-            float(importance) for importance in feature_imp.toArray()
-        ]
-
-        true_column_names = [column_name for (column_name, _) in true_cols]
-        true_categories = [category for (_, category) in true_cols]
-
-        features_df = self.task.spark.createDataFrame(
-            zip(true_column_names, true_categories, feature_importances, strict=True),
-            "feature_name: string, category: int, coefficient_or_importance: double",
-        ).sort("feature_name", "category")
+            # We need to convert from numpy float64s to Python floats to avoid type
+            # issues when creating the DataFrame below.
+            feature_importances = [
+                float(importance) for importance in feature_imp.toArray()
+            ]
+            features_df = self.task.spark.createDataFrame(
+                zip(
+                    true_column_names, true_categories, feature_importances, strict=True
+                ),
+                "feature_name: string, category: int, coefficient_or_importance: double",
+            ).sort("feature_name", "category")
 
         feature_importances_table = (
             f"{self.task.table_prefix}training_feature_importances"

From 0277d7d8a4d06e3c4fd2ab9fad495e3e0dd35f0d Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Mon, 18 Nov 2024 12:53:03 -0600
Subject: [PATCH 15/19] [#161] Rename a variable in training step 3

---
 .../training/link_step_save_model_metadata.py     | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/hlink/linking/training/link_step_save_model_metadata.py b/hlink/linking/training/link_step_save_model_metadata.py
index ed98274..88f82a4 100644
--- a/hlink/linking/training/link_step_save_model_metadata.py
+++ b/hlink/linking/training/link_step_save_model_metadata.py
@@ -58,10 +58,9 @@ def _run(self):
 
             raise new_error from e
 
-        # The pipeline model has three stages: vector assembler, classifier, post
-        # transformer.
+        # The pipeline model has three stages: vector assembler, model, and post transformer.
         vector_assembler = pipeline_model.stages[0]
-        classifier = pipeline_model.stages[1]
+        model = pipeline_model.stages[1]
 
         column_names = vector_assembler.getInputCols()
         tf_prepped = self.task.spark.table(f"{table_prefix}training_features_prepped")
@@ -93,13 +92,13 @@ def _run(self):
         print("Retrieving model feature importances or coefficients...")
 
         if model_type == "xgboost":
-            raw_weights = classifier.get_feature_importances("weight")
-            raw_gains = classifier.get_feature_importances("gain")
+            raw_weights = model.get_feature_importances("weight")
+            raw_gains = model.get_feature_importances("gain")
             keys = [f"f{index}" for index in range(len(true_cols))]
 
             weights = [raw_weights.get(key, 0.0) for key in keys]
             gains = [raw_gains.get(key, 0.0) for key in keys]
-            label = "Feature importances (weights and gain)"
+            label = "Feature importances (weights and gains)"
 
             features_df = self.task.spark.createDataFrame(
                 zip(true_column_names, true_categories, weights, gains),
@@ -107,10 +106,10 @@ def _run(self):
             ).sort("feature_name", "category")
         else:
             try:
-                feature_imp = classifier.coefficients
+                feature_imp = model.coefficients
             except:
                 try:
-                    feature_imp = classifier.featureImportances
+                    feature_imp = model.featureImportances
                 except:
                     print(
                         "This model doesn't contain a coefficient or feature importances parameter -- check chosen model type."

From 3ca39529d918a97bf089f342ec698856156a0b7d Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Mon, 18 Nov 2024 15:13:50 -0600
Subject: [PATCH 16/19] [#161] Make the "xgboost is missing" error message more
 helpful

---
 hlink/linking/core/classifier.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hlink/linking/core/classifier.py b/hlink/linking/core/classifier.py
index 65295fe..8ad400f 100644
--- a/hlink/linking/core/classifier.py
+++ b/hlink/linking/core/classifier.py
@@ -106,7 +106,9 @@ def choose_classifier(model_type, params, dep_var):
     elif model_type == "xgboost":
         if not _xgboost_available:
             raise ModuleNotFoundError(
-                "model_type 'xgboost' requires the xgboost library"
+                "To use the experimental 'xgboost' model type, you need to install "
+                "the xgboost library and its dependencies. Try installing hlink with "
+                "the xgboost extra: 'pip install hlink[xgboost]'."
             )
         params_without_threshold = {
             key: val

From b992ba50eb14f9289259dcb4c407d88b6863648a Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Mon, 18 Nov 2024 15:57:36 -0600
Subject: [PATCH 17/19] [#161] Update the README with information on XGBoost

---
 README.md | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index c020e7a..d2ee155 100755
--- a/README.md
+++ b/README.md
@@ -26,19 +26,52 @@ We do our best to make hlink compatible with Python 3.10-3.12. If you have a
 problem using hlink on one of these versions of Python, please open an issue
 through GitHub. Versions of Python older than 3.10 are not supported.
 
-Note that pyspark 3.5 does not yet officially support Python 3.12. If you
-encounter pyspark-related import errors while running hlink on Python 3.12, try
+Note that PySpark 3.5 does not yet officially support Python 3.12. If you
+encounter PySpark-related import errors while running hlink on Python 3.12, try
 
 - Installing the setuptools package. The distutils package was deleted from the
-  standard library in Python 3.12, but some versions of pyspark still import
+  standard library in Python 3.12, but some versions of PySpark still import
   it. The setuptools package provides a hacky stand-in distutils library which
-  should fix some import errors in pyspark. We install setuptools in our
+  should fix some import errors in PySpark. We install setuptools in our
   development and test dependencies so that our tests work on Python 3.12.
 
-- Downgrading Python to 3.10 or 3.11. Pyspark officially supports these
-  versions of Python. So you should have better chances getting pyspark to work
+- Downgrading Python to 3.10 or 3.11. PySpark officially supports these
+  versions of Python. So you should have better chances getting PySpark to work
   well on Python 3.10 or 3.11.
 
+### XGBoost Support
+
+[XGBoost](https://xgboost.readthedocs.io/en/stable/index.html) is a highly
+performant gradient boosting machine learning library. hlink includes optional
+support for XGBoost through the xgboost Python package. This support is
+experimental and may change since the XGBoost-PySpark integration provided by
+the xgboost package is currently unstable.
+
+To install the xgboost package and its Python dependencies, run `pip install
+hlink[xgboost]`. This may be enough to get xgboost running on some machines. If
+you run into further errors, you might need to install the libomp package,
+which xgboost requires.
+
+After installing xgboost, you can use it as a model type in training and model
+exploration. xgboost has a large list of available parameters, which you can
+check out [here](https://xgboost.readthedocs.io/en/latest/parameter.html).
+hlink passes parameters defined in your config file through to the xgboost
+library.
+
+```toml
+# max_depth, eta, and gamma are parameters for xgboost. threshold and
+# threshold_ratio are hlink-specific configurations universal to all model types.
+chosen_model = {
+    type = "xgboost",
+    max_depth = 5,
+    eta = 0.5,
+    gamma = 0.05,
+    threshold = 0.5,
+    threshold_ratio = 2.0
+}
+```
+
+
 ## Docs
 
 The documentation site can be found at [hlink.docs.ipums.org](https://hlink.docs.ipums.org).

From 3065310e5a78fe8a41a0b37b76d0961576e26828 Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Mon, 18 Nov 2024 16:22:01 -0600
Subject: [PATCH 18/19] [#161] Add information about xgboost to models.md

---
 sphinx-docs/models.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/sphinx-docs/models.md b/sphinx-docs/models.md
index a1c9996..357056d 100644
--- a/sphinx-docs/models.md
+++ b/sphinx-docs/models.md
@@ -87,3 +87,38 @@ chosen_model = {
     threshold_ratio = 1.3
 }
 ```
+
+## xgboost
+
+*Added in version 3.8.0.*
+
+This is an alternate, high-performance implementation of gradient boosting.
+It uses [xgboost.spark.SparkXGBClassifier](https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.spark.SparkXGBClassifier).
+Since the XGBoost-PySpark integration which the xgboost Python package provides
+is currently unstable, support for the xgboost model type is disabled in hlink
+by default. hlink will stop with an error if you try to use this model type
+without enabling support for it. To enable support for xgboost, install hlink
+with the `xgboost` extra.
+
+```
+pip install hlink[xgboost]
+```
+
+This installs the xgboost package and its Python dependencies. Depending on
+your machine and operating system, you may also need to install the libomp
+library, which is another dependency of xgboost. xgboost should raise a helpful
+error if it detects that you need to install libomp.
+
+You can view a list of xgboost's parameters
+[here](https://xgboost.readthedocs.io/en/latest/parameter.html).
+
+```
+chosen_model = {
+    type = "xgboost",
+    max_depth = 5,
+    eta = 0.5,
+    gamma = 0.05,
+    threshold = 0.8,
+    threshold_ratio = 1.5
+}
+```

From ab1d83a228cdea0995072b8fde305420a203e85b Mon Sep 17 00:00:00 2001
From: rileyh <rileyh@umn.edu>
Date: Tue, 19 Nov 2024 11:16:37 -0600
Subject: [PATCH 19/19] [#161] Regenerate Sphinx docs

This also updates Alabaster to 1.0.0.
---
 docs/_sources/models.md.txt            |  35 ++++++++
 docs/_static/alabaster.css             | 115 ++++++++-----------------
 docs/_static/github-banner.svg         |   5 ++
 docs/column_mappings.html              |  26 +++---
 docs/comparison_features.html          |  26 +++---
 docs/comparisons.html                  |  26 +++---
 docs/config.html                       |  26 +++---
 docs/feature_selection_transforms.html |  26 +++---
 docs/genindex.html                     |  26 +++---
 docs/index.html                        |  27 +++---
 docs/installation.html                 |  26 +++---
 docs/introduction.html                 |  26 +++---
 docs/link_tasks.html                   |  26 +++---
 docs/models.html                       |  57 +++++++++---
 docs/pipeline_features.html            |  26 +++---
 docs/running_the_program.html          |  26 +++---
 docs/search.html                       |   5 +-
 docs/searchindex.js                    |   2 +-
 docs/substitutions.html                |  26 +++---
 docs/use_examples.html                 |  26 +++---
 20 files changed, 306 insertions(+), 278 deletions(-)
 create mode 100644 docs/_static/github-banner.svg

diff --git a/docs/_sources/models.md.txt b/docs/_sources/models.md.txt
index a1c9996..357056d 100644
--- a/docs/_sources/models.md.txt
+++ b/docs/_sources/models.md.txt
@@ -87,3 +87,38 @@ chosen_model = {
     threshold_ratio = 1.3
 }
 ```
+
+## xgboost
+
+*Added in version 3.8.0.*
+
+This is an alternate, high-performance implementation of gradient boosting.
+It uses [xgboost.spark.SparkXGBClassifier](https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.spark.SparkXGBClassifier).
+Since the XGBoost-PySpark integration which the xgboost Python package provides
+is currently unstable, support for the xgboost model type is disabled in hlink
+by default. hlink will stop with an error if you try to use this model type
+without enabling support for it. To enable support for xgboost, install hlink
+with the `xgboost` extra.
+
+```
+pip install hlink[xgboost]
+```
+
+This installs the xgboost package and its Python dependencies. Depending on
+your machine and operating system, you may also need to install the libomp
+library, which is another dependency of xgboost. xgboost should raise a helpful
+error if it detects that you need to install libomp.
+
+You can view a list of xgboost's parameters
+[here](https://xgboost.readthedocs.io/en/latest/parameter.html).
+
+```
+chosen_model = {
+    type = "xgboost",
+    max_depth = 5,
+    eta = 0.5,
+    gamma = 0.05,
+    threshold = 0.8,
+    threshold_ratio = 1.5
+}
+```
diff --git a/docs/_static/alabaster.css b/docs/_static/alabaster.css
index e3174bf..7e75bf8 100644
--- a/docs/_static/alabaster.css
+++ b/docs/_static/alabaster.css
@@ -1,5 +1,3 @@
-@import url("basic.css");
-
 /* -- page layout ----------------------------------------------------------- */
 
 body {
@@ -160,8 +158,8 @@ div.sphinxsidebar input {
     font-size: 1em;
 }
 
-div.sphinxsidebar #searchbox input[type="text"] {
-    width: 160px;
+div.sphinxsidebar #searchbox {
+    margin: 1em 0;
 }
 
 div.sphinxsidebar .search > div {
@@ -263,10 +261,6 @@ div.admonition p.last {
     margin-bottom: 0;
 }
 
-div.highlight {
-    background-color: #fff;
-}
-
 dt:target, .highlight {
     background: #FAF3E8;
 }
@@ -454,7 +448,7 @@ ul, ol {
 }
 
 pre {
-    background: #EEE;
+    background: unset;
     padding: 7px 30px;
     margin: 15px 0px;
     line-height: 1.3em;
@@ -485,15 +479,15 @@ a.reference {
     border-bottom: 1px dotted #004B6B;
 }
 
+a.reference:hover {
+    border-bottom: 1px solid #6D4100;
+}
+
 /* Don't put an underline on images */
 a.image-reference, a.image-reference:hover {
     border-bottom: none;
 }
 
-a.reference:hover {
-    border-bottom: 1px solid #6D4100;
-}
-
 a.footnote-reference {
     text-decoration: none;
     font-size: 0.7em;
@@ -509,68 +503,7 @@ a:hover tt, a:hover code {
     background: #EEE;
 }
 
-
-@media screen and (max-width: 870px) {
-
-    div.sphinxsidebar {
-    	display: none;
-    }
-
-    div.document {
-       width: 100%;
-
-    }
-
-    div.documentwrapper {
-    	margin-left: 0;
-    	margin-top: 0;
-    	margin-right: 0;
-    	margin-bottom: 0;
-    }
-
-    div.bodywrapper {
-    	margin-top: 0;
-    	margin-right: 0;
-    	margin-bottom: 0;
-    	margin-left: 0;
-    }
-
-    ul {
-    	margin-left: 0;
-    }
-
-	li > ul {
-        /* Matches the 30px from the "ul, ol" selector above */
-		margin-left: 30px;
-	}
-
-    .document {
-    	width: auto;
-    }
-
-    .footer {
-    	width: auto;
-    }
-
-    .bodywrapper {
-    	margin: 0;
-    }
-
-    .footer {
-    	width: auto;
-    }
-
-    .github {
-        display: none;
-    }
-
-
-
-}
-
-
-
-@media screen and (max-width: 875px) {
+@media screen and (max-width: 940px) {
 
     body {
         margin: 0;
@@ -580,12 +513,16 @@ a:hover tt, a:hover code {
     div.documentwrapper {
         float: none;
         background: #fff;
+        margin-left: 0;
+        margin-top: 0;
+        margin-right: 0;
+        margin-bottom: 0;
     }
 
     div.sphinxsidebar {
         display: block;
         float: none;
-        width: 102.5%;
+        width: unset;
         margin: 50px -30px -20px -30px;
         padding: 10px 20px;
         background: #333;
@@ -620,8 +557,14 @@ a:hover tt, a:hover code {
 
     div.body {
         min-height: 0;
+        min-width: auto; /* fixes width on small screens, breaks .hll */
         padding: 0;
     }
+    
+    .hll {
+        /* "fixes" the breakage */
+        width: max-content;
+    }
 
     .rtd_doc_footer {
         display: none;
@@ -635,13 +578,18 @@ a:hover tt, a:hover code {
         width: auto;
     }
 
-    .footer {
-        width: auto;
-    }
-
     .github {
         display: none;
     }
+
+    ul {
+        margin-left: 0;
+    }
+
+    li > ul {
+       /* Matches the 30px from the "ul, ol" selector above */
+        margin-left: 30px;
+    }
 }
 
 
@@ -705,4 +653,11 @@ nav#breadcrumbs li+li:before {
     div.related {
         display: none;
     }
+}
+
+img.github  {
+    position: absolute;
+    top: 0;
+    border: 0;
+    right: 0;
 }
\ No newline at end of file
diff --git a/docs/_static/github-banner.svg b/docs/_static/github-banner.svg
new file mode 100644
index 0000000..c47d9dc
--- /dev/null
+++ b/docs/_static/github-banner.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="80" height="80" viewBox="0 0 250 250" fill="#fff">
+    <path d="M0 0l115 115h15l12 27 108 108V0z" fill="#151513"/>
+    <path d="M128 109c-15-9-9-19-9-19 3-7 2-11 2-11-1-7 3-2 3-2 4 5 2 11 2 11-3 10 5 15 9 16"/>
+    <path d="M115 115s4 2 5 0l14-14c3-2 6-3 8-3-8-11-15-24 2-41 5-5 10-7 16-7 1-2 3-7 12-11 0 0 5 3 7 16 4 2 8 5 12 9s7 8 9 12c14 3 17 7 17 7-4 8-9 11-11 11 0 6-2 11-7 16-16 16-30 10-41 2 0 3-1 7-5 11l-12 11c-1 1 1 5 1 5z"/>
+</svg>
diff --git a/docs/column_mappings.html b/docs/column_mappings.html
index 623d185..c191199 100644
--- a/docs/column_mappings.html
+++ b/docs/column_mappings.html
@@ -7,7 +7,8 @@
 
     <title>Column Mappings &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -369,7 +370,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -403,16 +413,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -430,7 +430,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/column_mappings.md.txt"
diff --git a/docs/comparison_features.html b/docs/comparison_features.html
index ebbfbda..d251a28 100644
--- a/docs/comparison_features.html
+++ b/docs/comparison_features.html
@@ -7,7 +7,8 @@
 
     <title>Comparison Features &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -1267,7 +1268,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -1302,16 +1312,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -1329,7 +1329,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/comparison_features.md.txt"
diff --git a/docs/comparisons.html b/docs/comparisons.html
index 1f214d4..244760e 100644
--- a/docs/comparisons.html
+++ b/docs/comparisons.html
@@ -7,7 +7,8 @@
 
     <title>Comparisons &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -164,7 +165,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -198,16 +208,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -225,7 +225,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/comparisons.md.txt"
diff --git a/docs/config.html b/docs/config.html
index 84e7b11..07bb31c 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -7,7 +7,8 @@
 
     <title>Configuration &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -912,7 +913,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -959,16 +969,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -986,7 +986,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/config.md.txt"
diff --git a/docs/feature_selection_transforms.html b/docs/feature_selection_transforms.html
index fe86f9a..4a58650 100644
--- a/docs/feature_selection_transforms.html
+++ b/docs/feature_selection_transforms.html
@@ -7,7 +7,8 @@
 
     <title>Feature Selection Transforms &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -184,7 +185,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -221,16 +231,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -248,7 +248,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/feature_selection_transforms.md.txt"
diff --git a/docs/genindex.html b/docs/genindex.html
index e3f4867..3de05ad 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -6,7 +6,8 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>Index &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -52,7 +53,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -79,16 +89,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -106,7 +106,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
     </div>
 
diff --git a/docs/index.html b/docs/index.html
index fa2a68c..d33d6db 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -7,7 +7,8 @@
 
     <title>Welcome to hlink’s documentation! &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -134,6 +135,7 @@ <h1>Configuration API<a class="headerlink" href="#configuration-api" title="Link
 <li class="toctree-l2"><a class="reference internal" href="models.html#logistic-regression">logistic_regression</a></li>
 <li class="toctree-l2"><a class="reference internal" href="models.html#decision-tree">decision_tree</a></li>
 <li class="toctree-l2"><a class="reference internal" href="models.html#gradient-boosted-trees">gradient_boosted_trees</a></li>
+<li class="toctree-l2"><a class="reference internal" href="models.html#xgboost">xgboost</a></li>
 </ul>
 </li>
 </ul>
@@ -156,7 +158,16 @@ <h1 class="logo"><a href="#">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -184,16 +195,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -211,7 +212,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/index.rst.txt"
diff --git a/docs/installation.html b/docs/installation.html
index 1d241e5..6cd06c5 100644
--- a/docs/installation.html
+++ b/docs/installation.html
@@ -7,7 +7,8 @@
 
     <title>Installation &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -75,7 +76,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">Installation</a><ul>
@@ -109,16 +119,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -136,7 +136,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/installation.md.txt"
diff --git a/docs/introduction.html b/docs/introduction.html
index 749729b..bf6781f 100644
--- a/docs/introduction.html
+++ b/docs/introduction.html
@@ -7,7 +7,8 @@
 
     <title>Introduction &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -89,7 +90,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul class="current">
 <li class="toctree-l1 current"><a class="current reference internal" href="#">Introduction</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="#overview">Overview</a></li>
@@ -121,16 +131,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -148,7 +148,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/introduction.md.txt"
diff --git a/docs/link_tasks.html b/docs/link_tasks.html
index a739318..7a7ab55 100644
--- a/docs/link_tasks.html
+++ b/docs/link_tasks.html
@@ -7,7 +7,8 @@
 
     <title>Link Tasks &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -237,7 +238,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -274,16 +284,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -301,7 +301,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/link_tasks.md.txt"
diff --git a/docs/models.html b/docs/models.html
index 96bf7de..93506b8 100644
--- a/docs/models.html
+++ b/docs/models.html
@@ -7,7 +7,8 @@
 
     <title>Models &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -132,6 +133,36 @@ <h2>gradient_boosted_trees<a class="headerlink" href="#gradient-boosted-trees" t
 </pre></div>
 </div>
 </section>
+<section id="xgboost">
+<h2>xgboost<a class="headerlink" href="#xgboost" title="Link to this heading">¶</a></h2>
+<p><em>Added in version 3.8.0.</em></p>
+<p>This is an alternate, high-performance implementation of gradient boosting.
+It uses <a class="reference external" href="https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.spark.SparkXGBClassifier">xgboost.spark.SparkXGBClassifier</a>.
+Since the XGBoost-PySpark integration which the xgboost Python package provides
+is currently unstable, support for the xgboost model type is disabled in hlink
+by default. hlink will stop with an error if you try to use this model type
+without enabling support for it. To enable support for xgboost, install hlink
+with the <code class="docutils literal notranslate"><span class="pre">xgboost</span></code> extra.</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">hlink</span><span class="p">[</span><span class="n">xgboost</span><span class="p">]</span>
+</pre></div>
+</div>
+<p>This installs the xgboost package and its Python dependencies. Depending on
+your machine and operating system, you may also need to install the libomp
+library, which is another dependency of xgboost. xgboost should raise a helpful
+error if it detects that you need to install libomp.</p>
+<p>You can view a list of xgboost’s parameters
+<a class="reference external" href="https://xgboost.readthedocs.io/en/latest/parameter.html">here</a>.</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">chosen_model</span> <span class="o">=</span> <span class="p">{</span>
+    <span class="nb">type</span> <span class="o">=</span> <span class="s2">&quot;xgboost&quot;</span><span class="p">,</span>
+    <span class="n">max_depth</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span>
+    <span class="n">eta</span> <span class="o">=</span> <span class="mf">0.5</span><span class="p">,</span>
+    <span class="n">gamma</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span>
+    <span class="n">threshold</span> <span class="o">=</span> <span class="mf">0.8</span><span class="p">,</span>
+    <span class="n">threshold_ratio</span> <span class="o">=</span> <span class="mf">1.5</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</section>
 </section>
 
 
@@ -150,7 +181,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -173,6 +213,7 @@ <h3>Navigation</h3>
 <li class="toctree-l2"><a class="reference internal" href="#logistic-regression">logistic_regression</a></li>
 <li class="toctree-l2"><a class="reference internal" href="#decision-tree">decision_tree</a></li>
 <li class="toctree-l2"><a class="reference internal" href="#gradient-boosted-trees">gradient_boosted_trees</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#xgboost">xgboost</a></li>
 </ul>
 </li>
 </ul>
@@ -185,16 +226,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -212,7 +243,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/models.md.txt"
diff --git a/docs/pipeline_features.html b/docs/pipeline_features.html
index d4a50f8..931dac7 100644
--- a/docs/pipeline_features.html
+++ b/docs/pipeline_features.html
@@ -7,7 +7,8 @@
 
     <title>Pipeline generated features &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -99,7 +100,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -131,16 +141,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -158,7 +158,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/pipeline_features.md.txt"
diff --git a/docs/running_the_program.html b/docs/running_the_program.html
index 5c066c4..085b800 100644
--- a/docs/running_the_program.html
+++ b/docs/running_the_program.html
@@ -7,7 +7,8 @@
 
     <title>Running hlink &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -285,7 +286,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -318,16 +328,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -345,7 +345,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/running_the_program.md.txt"
diff --git a/docs/search.html b/docs/search.html
index 6e582ed..30fa50f 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -6,7 +6,8 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>Search &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
@@ -123,7 +124,7 @@ <h3>Related Topics</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
     </div>
 
diff --git a/docs/searchindex.js b/docs/searchindex.js
index 149159e..85c193f 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"1:1 substitution by data table": [[12, "substitution-by-data-table"]], "Advanced Config File": [[3, "advanced-config-file"]], "Advanced Usage": [[0, "advanced-usage"]], "Advanced Workflow Examples": [[13, null]], "Aggregate Features": [[1, "aggregate-features"]], "Basic Config File": [[3, "basic-config-file"]], "Basic Usage": [[0, "basic-usage"]], "Blocking": [[3, "blocking"]], "Column Mappings": [[0, null], [3, "column-mappings"]], "Comparison Features": [[1, null], [3, "comparison-features"]], "Comparison Types": [[1, "comparison-types"], [2, "comparison-types"]], "Comparisons": [[2, null], [3, "comparisons"]], "Configuration": [[3, null]], "Configuration API": [[5, "configuration-api"], [5, null]], "Data sources": [[3, "data-sources"]], "Defining Multiple Comparisons": [[2, "defining-multiple-comparisons"]], "Example interactive mode workflow": [[11, "example-interactive-mode-workflow"]], "Example model exploration and FP/FN export workflow": [[13, "example-model-exploration-and-fp-fn-export-workflow"]], "Example training data export with generated ML features": [[13, "example-training-data-export-with-generated-ml-features"]], "Export training data after generating features to reuse in different linking years": [[13, "export-training-data-after-generating-features-to-reuse-in-different-linking-years"]], "Feature Selection Transforms": [[4, null]], "Feature Selections": [[3, "feature-selections"]], "Feature add-ons": [[1, "feature-add-ons"]], "Filter": [[3, "filter"]], "Household Aggregate Features": [[1, "household-aggregate-features"]], "Household Comparisons": [[3, "household-comparisons"]], "Household Matching": [[8, "household-matching"]], "Household training and models": [[3, "household-training-and-models"]], "Installation": [[6, null]], "Installing from PyPI": [[6, "installing-from-pypi"]], "Installing from source": [[6, "installing-from-source"]], "Interactive Mode": [[11, "interactive-mode"]], "Introduction": [[7, null]], "Link Tasks": [[8, null]], "ML model exploration and export of lists of potential false positives/negatives in training data": [[13, "ml-model-exploration-and-export-of-lists-of-potential-false-positives-negatives-in-training-data"]], "Matching": [[8, "matching"]], "Model Exploration and Household Model Exploration": [[8, "model-exploration-and-household-model-exploration"]], "Models": [[9, null]], "Multiple Comparisons": [[3, "multiple-comparisons"]], "Overview": [[2, "overview"], [7, "overview"], [8, "overview"], [8, "id1"], [8, "id4"], [8, "id7"], [8, "id10"], [8, "id13"]], "Pipeline generated features": [[10, null]], "Pipeline-generated Features": [[3, "pipeline-generated-features"]], "Potential Matches Universe": [[3, "potential-matches-universe"]], "Preprocessing": [[8, "preprocessing"]], "Related Configuration Sections": [[8, "related-configuration-sections"], [8, "id3"], [8, "id6"], [8, "id9"], [8, "id12"], [8, "id15"]], "Reporting": [[8, "reporting"]], "Requirements": [[6, "requirements"]], "Running Linking Tasks and Steps": [[11, "running-linking-tasks-and-steps"]], "Running hlink": [[11, null]], "Single Comparison": [[3, "single-comparison"]], "Starting the program": [[11, "starting-the-program"]], "Substitution Columns": [[3, "substitution-columns"]], "Substitution by regex word replace": [[12, "substitution-by-regex-word-replace"]], "Substitutions": [[12, null]], "Task steps": [[8, "task-steps"], [8, "id2"], [8, "id5"], [8, "id8"], [8, "id11"], [8, "id14"]], "Top level configs": [[3, "top-level-configs"]], "Training and Household Training": [[8, "training-and-household-training"]], "Training and models": [[3, "training-and-models"]], "Transformer types": [[10, "transformer-types"]], "Transforms": [[0, "transforms"]], "Using hlink as a Library": [[11, "using-hlink-as-a-library"]], "Welcome to hlink\u2019s documentation!": [[5, null]], "abs_diff": [[1, "abs-diff"]], "add_to_a": [[0, "add-to-a"]], "alias": [[1, "alias"]], "all_equals": [[1, "all-equals"]], "and": [[1, "and"]], "any_equals": [[1, "any-equals"]], "array": [[4, "array"]], "array_index": [[0, "array-index"]], "b_minus_a": [[1, "b-minus-a"]], "bigrams": [[4, "bigrams"]], "btwn_threshold": [[1, "btwn-threshold"]], "bucketizer": [[10, "bucketizer"]], "caution_comp_3": [[1, "caution-comp-3"]], "caution_comp_3_012": [[1, "caution-comp-3-012"]], "caution_comp_4": [[1, "caution-comp-4"]], "caution_comp_4_012": [[1, "caution-comp-4-012"]], "concat_to_a": [[0, "concat-to-a"]], "concat_to_b": [[0, "concat-to-b"]], "concat_two_cols": [[0, "concat-two-cols"]], "condense_strip_whitespace": [[0, "condense-strip-whitespace"]], "decision_tree": [[9, "decision-tree"]], "divide_by_int": [[0, "divide-by-int"]], "either_are_0": [[1, "either-are-0"]], "either_are_1": [[1, "either-are-1"]], "equals": [[1, "equals"]], "equals_as_int": [[1, "equals-as-int"]], "exact_mult": [[1, "exact-mult"]], "extra_children": [[1, "extra-children"]], "f1_match": [[1, "f1-match"]], "f2_match": [[1, "f2-match"]], "fetch_a": [[1, "fetch-a"]], "fetch_b": [[1, "fetch-b"]], "geo_distance": [[1, "geo-distance"]], "get_floor": [[0, "get-floor"]], "gradient_boosted_trees": [[9, "gradient-boosted-trees"]], "gt_threshold": [[1, "gt-threshold"]], "hits": [[1, "hits"]], "hits2": [[1, "hits2"]], "interaction": [[10, "interaction"]], "jaro_winkler": [[1, "jaro-winkler"]], "jaro_winkler_rate": [[1, "jaro-winkler-rate"]], "jaro_winkler_street": [[1, "jaro-winkler-street"]], "jw_max_a": [[1, "jw-max-a"]], "jw_max_b": [[1, "jw-max-b"]], "length_b": [[1, "length-b"]], "logistic_regression": [[9, "logistic-regression"]], "look_at_addl_var": [[1, "look-at-addl-var"]], "lower_threshold": [[1, "lower-threshold"]], "lowercase_strip": [[0, "lowercase-strip"]], "mapping": [[0, "mapping"]], "max_jaro_winkler": [[1, "max-jaro-winkler"]], "maximum_jaro_winkler": [[1, "maximum-jaro-winkler"]], "multi_jaro_winkler_search": [[1, "multi-jaro-winkler-search"]], "neither_are_null": [[1, "neither-are-null"]], "not_equals": [[1, "not-equals"]], "not_zero_and_not_equals": [[1, "not-zero-and-not-equals"]], "or": [[1, "or"]], "power": [[1, "power"], [4, "power"]], "present_and_equal_categorical_in_universe": [[1, "present-and-equal-categorical-in-universe"]], "present_and_matching_categorical": [[1, "present-and-matching-categorical"]], "present_and_not_equal": [[1, "present-and-not-equal"]], "present_both_years": [[1, "present-both-years"]], "probit": [[9, "probit"]], "random_forest": [[9, "random-forest"]], "rationalize_name_words": [[0, "rationalize-name-words"]], "rel_jaro_winkler": [[1, "rel-jaro-winkler"]], "remove_alternate_names": [[0, "remove-alternate-names"]], "remove_one_letter_names": [[0, "remove-one-letter-names"]], "remove_prefixes": [[0, "remove-prefixes"]], "remove_punctuation": [[0, "remove-punctuation"]], "remove_qmark_hyphen": [[0, "remove-qmark-hyphen"]], "remove_stop_words": [[0, "remove-stop-words"]], "remove_suffixes": [[0, "remove-suffixes"]], "replace_apostrophe": [[0, "replace-apostrophe"]], "second_gen_imm": [[1, "second-gen-imm"]], "soundex": [[4, "soundex"]], "split": [[0, "split"]], "sql_condition": [[1, "sql-condition"], [4, "sql-condition"]], "substring": [[0, "substring"]], "sum": [[1, "sum"]], "threshold": [[1, "threshold"]], "times": [[1, "times"]], "union": [[4, "union"]], "upper_threshold": [[1, "upper-threshold"]], "when_value": [[0, "when-value"]]}, "docnames": ["column_mappings", "comparison_features", "comparisons", "config", "feature_selection_transforms", "index", "installation", "introduction", "link_tasks", "models", "pipeline_features", "running_the_program", "substitutions", "use_examples"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["column_mappings.md", "comparison_features.md", "comparisons.md", "config.md", "feature_selection_transforms.md", "index.rst", "installation.md", "introduction.md", "link_tasks.md", "models.md", "pipeline_features.md", "running_the_program.md", "substitutions.md", "use_examples.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": [0, 1, 3, 7, 8, 11, 12], "0": [0, 1, 2, 3, 8, 9, 10, 11, 13], "005": 3, "012": 1, "05": [1, 3], "1": [0, 1, 2, 3, 5, 8, 9, 10, 11, 13], "10": [0, 3, 6, 13], "100": [0, 3, 13], "11": [0, 3, 6, 10], "12": [0, 6], "14": 1, "15": 9, "1867": 3, "1868": 3, "1869": 3, "1870": 3, "1871": 3, "1872": 3, "1873": 3, "1900": [3, 13], "1900_1910_potential_fn": 13, "1900_1910_potential_fp": 13, "1900_1910_training_data_20191023": 3, "1900_1910_training_result": 13, "1910": [1, 3, 13], "1920": 13, "1930": [3, 13], "1940": [3, 13], "1999": [0, 3], "2": [0, 1, 2, 3, 4, 8, 9, 12, 13], "20": 9, "25": 1, "3": [0, 1, 2, 3, 6, 8, 9, 10, 13], "300": 0, "301": 0, "302": 0, "303": 0, "4": [0, 1, 9], "5": [0, 1, 2, 3, 9, 10, 11, 13], "50": [3, 13], "50g": 13, "53": 3, "5g": 11, "6": [0, 3, 9, 10, 13], "65": 3, "7": [0, 1, 3, 9, 13], "75": [3, 9, 13], "79": [2, 3], "7th": 12, "8": [1, 3, 6, 11], "80": 3, "84": [2, 3], "85": [9, 11], "9": 1, "95": 1, "99": [1, 3], "9998": 0, "9999": [0, 3, 10], "A": [0, 1, 2, 3, 4, 10, 11], "AND": [1, 2, 3], "As": [0, 2], "At": [7, 8], "But": [3, 6], "By": [0, 3, 11], "For": [0, 1, 2, 3, 8, 11, 13], "If": [0, 1, 3, 4, 8, 9, 11, 12], "In": [0, 1, 2, 7, 11, 13], "It": [0, 1, 2, 3, 7, 11, 13], "NOT": 1, "OR": [1, 2, 3], "THEN": 1, "The": [0, 1, 2, 3, 4, 6, 8, 9, 10, 11, 13], "Then": [0, 6], "There": [1, 3, 8], "These": [0, 1, 2, 3, 7, 8, 9, 10], "To": [0, 1, 6, 8, 11], "Will": 3, "With": [0, 10], "_": [0, 1, 2, 3, 4, 5, 9, 10, 11], "_a": 1, "_bpl": 1, "_namefrst": 1, "_sex": 1, "a304bt": 3, "ab": 1, "abbrevi": [8, 12], "abl": 3, "about": [3, 11, 13], "abov": [1, 2, 6], "absolut": 1, "accept": [1, 3, 13], "access": [11, 13], "accord": 1, "across": 1, "ad": [0, 1, 2, 3], "add": [0, 2, 5], "add_to_a": 3, "addit": [0, 1, 3, 6, 7, 11], "addl": 1, "addl_var": 1, "adjust": 11, "adopt": 0, "advanc": 5, "affect": 4, "after": [1, 2, 3, 5, 9, 11], "ag": [0, 1, 3, 4], "against": [1, 3, 12], "age_2": 3, "age_at_dataset_b": 0, "age_threshold": 1, "aggreg": 5, "ah": 0, "ahead": 8, "akin": 1, "algorithm": [1, 2, 7, 8], "alia": [0, 3, 8], "all": [0, 1, 3, 4, 8, 9, 10, 11], "allow": [1, 3, 8, 13], "along": 1, "alpha": 9, "alphabet": 0, "alphanumer": 3, "also": [0, 1, 2, 3, 6, 7, 8, 10, 11, 13], "altern": [0, 3], "although": 3, "alwai": 2, "among": 1, "amount": 3, "an": [0, 1, 2, 3, 7, 9, 11], "analysi": 13, "analyz": [11, 13], "ani": [1, 3, 4, 6, 9], "anoth": [0, 1, 3, 4, 8], "anyon": 8, "anywher": 12, "apach": 6, "apart": 0, "api": [3, 7, 10], "apostroph": 0, "appear": [0, 1], "appli": [0, 2, 3, 4, 8, 13], "apply_model": 3, "appropri": 3, "ar": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 13], "arbitrari": 0, "area": 3, "aren": 1, "arg": 13, "argument": [1, 11, 13], "around": 3, "arrai": [0, 3, 5, 9, 10], "array_index": 3, "ask": 13, "aspect": [8, 11], "assert": [1, 11], "assum": 3, "attach_vari": 3, "attempt": 3, "attribut": [0, 1, 2, 3, 4, 8, 9, 10, 11, 12], "auto": 9, "automat": [0, 3, 6, 8], "av": 12, "avail": [0, 1, 3, 4, 6, 8, 9, 10, 13], "avenu": [0, 12], "b": [0, 1, 2, 3, 4, 11], "b200": 3, "back": 1, "backup": 1, "base": [1, 3, 8], "basic": 5, "becaus": 2, "been": 7, "befor": [0, 1, 3, 4, 6, 8], "begin": 11, "behind": 0, "being": [1, 8], "belong": 3, "below": [0, 1, 3, 4, 9, 10, 11], "best": [3, 8], "beta": [3, 9], "better": [0, 7], "between": [0, 1, 2, 3, 7, 8, 11, 13], "beyond": 1, "bigint": 3, "bigram": [3, 5], "bin": 9, "binomi": 9, "birth": 1, "birthplac": [0, 3], "birthyr": [0, 3], "birthyr_3": 3, "birthyr_col": 1, "block": [2, 5, 8], "blvd": 0, "boolean": [1, 3, 4, 12, 13], "boost": [5, 9], "born": 1, "borrow_t": 11, "both": [0, 1, 2, 3, 8, 13], "boundari": 1, "bpl": [0, 1, 3], "bpl1": 3, "bpl2": 3, "bpl2_str": 3, "bpl3": 3, "bpl_clean": 3, "bpl_orig": 3, "bpl_root": 0, "bpl_str": 3, "bplmatch": 3, "broken": 7, "btwn": 1, "bucket": [3, 8], "built": 6, "builtin": 1, "byrdifcat": 3, "byrdiff": [1, 3, 13], "c": 1, "c201": 3, "calcul": [1, 13], "call": 0, "can": [0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 13], "cannot": 3, "care": 2, "cartesian": 1, "case": [0, 1, 2, 3, 4, 7], "cast": 1, "categor": [1, 3, 9, 10], "categori": [0, 9], "caus": 9, "caution": [1, 10], "censu": [0, 8, 13], "census": 7, "chang": [0, 1, 3, 6, 11, 13], "charact": 0, "characterist": [7, 8], "check": [1, 11], "check_val_expr": 1, "checkpoint": 4, "child": [0, 9], "children": 1, "chines": 0, "choic": 11, "chosen": 8, "chosen_model": [3, 9, 13], "circl": 0, "circumst": 1, "class": 11, "classif": [8, 9], "claus": [0, 1], "clean": [0, 7], "clean_birthyr": [0, 3, 4], "clone": 6, "code": [0, 1, 3, 6], "coeffici": [3, 8], "col": [0, 1], "col_to_add": 3, "col_to_join_on": 3, "col_typ": 3, "column": [1, 4, 5, 8, 10, 11, 12, 13], "column_map": [0, 3, 8], "column_nam": [0, 1, 3, 12], "column_to_append": 0, "combin": [1, 2, 3, 4, 8], "come": 1, "command": [3, 7, 11, 13], "comment": 4, "common": [7, 8, 13], "commonli": 9, "comp": 1, "comp_a": [1, 2, 3], "comp_b": [1, 2, 3], "comp_c": 1, "comp_d": 1, "compar": [0, 1, 3, 7, 8], "comparis": 3, "comparison": [5, 8], "comparison_featur": [1, 2, 3, 8], "comparison_typ": [1, 2, 3], "complet": 11, "complex": [3, 4], "comput": [1, 4, 8], "concat": 0, "concaten": [0, 1], "condens": 0, "condense_strip_whitespac": 3, "condit": [0, 1, 2, 3, 4, 5, 8], "conf": [11, 13], "config": [1, 4, 5, 8, 11, 13], "configur": [0, 1, 2, 7, 11, 13], "conjuct": 3, "conjunct": 3, "connect": [2, 3], "consid": [1, 2, 9], "consider": 1, "consol": 11, "constraint": [1, 2], "construct": 8, "contain": [0, 1, 3, 12], "context": 10, "continu": [9, 10, 13], "conveni": 11, "convert": [0, 1, 3], "convert_ints_to_long": 3, "copi": [4, 13], "core": [1, 8, 11, 13], "correspond": [7, 8], "could": [0, 2, 3], "count": [1, 11, 13], "counti": [0, 1], "county_1900_1910_distances_km": 1, "county_a": 1, "county_b": 1, "county_dist": [1, 3, 13], "county_distance_lookup": 1, "county_distance_squar": [1, 3, 13], "county_state_dist": 1, "court": 0, "cover": 10, "coverag": 3, "cpu": 11, "creat": [0, 3, 7, 8, 10, 11, 12, 13], "creation": 3, "crosswalk": 8, "csv": [1, 3, 8, 11, 12, 13], "current": [1, 2, 3, 11], "d": 1, "d425": 3, "data": [0, 1, 5, 7, 8, 11], "databas": 11, "datafram": [8, 11, 13], "dataset": [0, 1, 3, 4, 7, 8, 11, 13], "datasourc": [1, 3, 11], "datasource_a": [3, 8], "datasource_b": [3, 8], "de": 9, "decis": [3, 5, 9, 13], "decisiontreeclassifi": 9, "default": [0, 1, 2, 3, 8, 9, 11], "defin": [1, 3, 5, 8, 9, 10, 11], "definit": [3, 8], "demograph": 8, "depend": [1, 2, 3, 6, 13], "dependent_var": [3, 13], "depth": 9, "deriv": 13, "derived_from": 3, "desc": 11, "describ": [0, 1, 2, 3, 11], "descript": [3, 9, 11], "detail": [0, 3, 11], "determin": [1, 8], "determinist": [7, 8], "dev": 6, "develop": [6, 7], "df": [3, 11], "dictionari": 11, "diff": 1, "differ": [0, 1, 3, 5, 7, 8], "digit": 0, "dir": 12, "directli": 2, "directori": [6, 11, 13], "discard": 9, "discret": 9, "discuss": 3, "dist": 1, "dist_tabl": 1, "distanc": [1, 9], "distance_col": 1, "distance_km": 1, "distances_fil": 1, "distinct": 1, "divid": 0, "divide_by_int": 3, "do": [0, 1, 2, 4, 11, 13], "doc": [9, 10], "document": [1, 3, 9, 11, 13], "doe": [1, 4, 8, 13], "don": [3, 4], "doubl": 10, "down": [0, 7, 13], "drastic": 8, "drop": [0, 3, 11], "drop_al": 11, "drop_all_prc": 11, "drop_all_temp": 11, "drop_data_from_scored_match": 3, "drop_duplicate_a": 3, "drop_duplicate_with_threshold_ratio": [3, 13], "duplic": [3, 9], "durat": 1, "dure": [1, 2, 3, 8], "durmarr": [1, 3], "e": 6, "each": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11], "easiest": [6, 11], "easili": 11, "edit": 6, "effect": [2, 4], "either": [0, 1, 2, 3, 7, 12], "either_0": 1, "either_1": 1, "element": 0, "elig": 8, "els": [1, 3, 4], "else_v": 1, "else_valu": 0, "empti": 1, "enabl": [3, 8, 11], "encod": [3, 4], "end": [0, 1, 3, 4, 12], "ensur": 3, "enter": 11, "entir": [0, 3], "enum_dist": 1, "enumer": 11, "equal": [2, 3, 12], "equal_and_not_null_templ": 1, "equival": 3, "error": [3, 10], "especi": 3, "etc": 13, "eval": 3, "evalu": [1, 3, 7, 8, 9], "even": 1, "everi": [1, 4], "ex": 1, "exact": [1, 3], "exact_mult": [3, 13], "exampl": [0, 1, 2, 3], "except": [3, 10], "exclud": 1, "excute_command": 11, "execut": 11, "execute_command": 11, "execute_task": 11, "executor": 11, "executor_memori": [11, 13], "exist": [1, 3, 11], "exit": 11, "expand": 3, "expand_length": 3, "expect": 1, "experi": 7, "experiment": [8, 11], "explan": 9, "explicitli": [1, 10], "explod": [2, 3, 8], "exploded_df_a": 11, "exploded_df_b": 11, "explor": [3, 5, 7, 11], "expon": 4, "exponenti": 1, "export": [5, 8, 11], "express": [0, 1, 3], "extend": 1, "extens": 8, "extra": 1, "extract": 3, "f": [1, 11], "f1": 1, "f1_match": 3, "f2": 1, "f2_match": 3, "f_caution": [3, 13], "f_interacted_jw_f": [3, 13], "f_pre": [3, 13], "factori": 11, "fail": 3, "fallback": 1, "fals": [1, 3, 4, 5, 7, 11], "famili": 9, "father_namefrst": 1, "favor": 1, "fbpl": 1, "fbpl_nomatch": 1, "fbplmatch": [3, 13], "featur": [2, 5, 7, 8, 9, 11], "feature_import": [3, 8, 13], "feature_nam": [2, 3], "feature_select": [2, 3, 4, 8], "featuresubsetstrategi": 9, "fed": [3, 8], "femal": [3, 12], "fetch": 1, "fetch_a": 3, "few": 4, "fewer": [1, 9], "fi": 1, "file": [1, 4, 5, 7, 8, 11, 12, 13], "filepath": 11, "fill": 1, "filter": [1, 2, 5, 8, 12], "final": [1, 3, 13], "find": [1, 8, 13], "finish": 11, "first": [0, 1, 3, 6, 8, 11, 12], "first_init_col": 1, "first_nam": 0, "five": 11, "fix": 8, "flag": [1, 2, 3, 10, 11, 13], "flexibl": [1, 2], "float": [1, 3, 9], "floor": 0, "focus": [2, 8], "follow": [0, 1, 2, 7, 11, 12, 13], "foreign": 1, "forest": [5, 9], "form": [1, 3, 8, 12], "format": 0, "four": 1, "framework": 13, "from": [0, 1, 3, 5, 7, 8, 9, 10, 11, 13], "from_icpsrctyi": 1, "from_statefip": 1, "fsoundex": [3, 13], "full": [3, 8, 13], "full_count_1870_1880": 11, "full_count_1900_1910": 13, "fullcount_1870_1880": 11, "function": [0, 1, 3, 7, 11], "further": [7, 13], "gbtclassifi": 9, "gen": 1, "gener": [0, 1, 5, 7, 8, 11], "generalizedlinearregress": 9, "geo": 1, "geograph": 1, "get": [0, 1, 2, 3, 11], "get_floor": 3, "get_set": 11, "get_step": 11, "get_tabl": 11, "get_task": 11, "github": 6, "give": [0, 3], "given": [0, 1, 2, 3, 4, 9, 13], "go": [3, 11], "good": 1, "gradient": [5, 9], "greater": [1, 2, 6], "greatest": 1, "group": [3, 8], "gt": 1, "h": 11, "ha": [0, 1, 2, 3, 4, 7, 11, 13], "handl": 11, "harmon": 0, "have": [0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 13], "haven": 3, "head": 0, "header": [3, 10, 12], "help": [3, 8, 11], "here": [2, 3, 8, 11, 13], "hh": [1, 2], "hh_blocked_match": 11, "hh_col": 3, "hh_comparison": [2, 3, 8], "hh_match": [3, 11], "hh_model_eval_repeat_fn": 11, "hh_model_eval_repeat_fp": 11, "hh_model_eval_training_data": 11, "hh_model_eval_training_featur": 11, "hh_model_eval_training_result": 11, "hh_model_eval_training_vector": 11, "hh_model_explor": 11, "hh_potential_match": [2, 11], "hh_potential_matchs_prep": 11, "hh_predicted_match": 11, "hh_repeat_fn": 13, "hh_repeat_fp": 13, "hh_scored_potential_match": 11, "hh_train": [1, 3, 8, 11, 13], "hh_training_data": 11, "hh_training_data_1900_1910": 3, "hh_training_featur": [11, 13], "hh_training_result": 13, "hidden": 11, "hierarch": [7, 11], "hierarchi": 11, "high": 11, "highest": [1, 3], "highli": [7, 8], "histid": [1, 3, 13], "histid_col": 1, "hit": [3, 11, 13], "hits2": [3, 13], "hlink": [0, 1, 2, 3, 4, 6, 7, 8, 13], "hold": 10, "hot": 3, "household": [0, 2, 5, 7, 9, 11, 13], "how": [1, 3, 8], "howev": [4, 7, 13], "hundr": 0, "hyper": [3, 7, 13], "hyperparamet": [9, 13], "hyphen": 0, "i": [0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13], "id": [1, 3], "id_column": [1, 3], "ident": 13, "identifi": [3, 7, 13], "if_valu": 0, "ignor": 7, "ii": [0, 3], "iii": [0, 3], "imm": [1, 3, 13], "imm_interacted_bplmatch": 3, "imm_interacted_immyear_caut": [3, 13], "immigr": 1, "immyear_caut": [3, 10], "immyear_diff": [1, 3, 10, 13], "implement": 13, "implicitli": 2, "import": [3, 8, 11, 13], "improv": 8, "includ": [1, 3, 8, 10, 11], "incompar": 1, "increas": [3, 10], "independ": [0, 3], "independent_var": [1, 3, 13], "index": [0, 6], "indic": [1, 13], "individu": [1, 3, 8, 13], "inf": 10, "inform": [0, 1, 3, 11], "ingest": 8, "initi": [0, 1, 11], "input": [0, 1, 3, 4, 7, 8, 11, 12], "input_col": 4, "input_column": [3, 4, 10], "input_table_nam": 11, "inspect": 8, "instal": 5, "instanc": [0, 9], "instead": [1, 3, 4, 6, 8], "instruct": [6, 11], "int": [0, 1, 3, 4, 9], "integ": [0, 1, 3, 10], "integr": 6, "interact": [3, 5, 8, 13], "interfac": 11, "intermedi": 11, "introduct": 5, "introspect": 8, "invalid": [1, 9], "ipum": [0, 7], "ipython": 11, "isn": 13, "istemporari": 11, "item": 0, "iter": 3, "its": [0, 1, 7, 11], "itself": [1, 2], "iv": 3, "jaro": [1, 10], "jaro_winkl": 3, "java": 6, "job": 11, "join": [1, 3, 12], "join_column": [3, 12], "join_valu": [3, 12], "jr": [0, 3], "json": [3, 11], "just": [1, 3, 11, 13], "jw": 1, "jw_col_templ": 1, "jw_f": [1, 3, 13], "jw_m": [3, 10, 13], "jw_max_a": 3, "jw_max_b": 3, "jw_sp": [3, 13], "jw_street": 1, "jw_threshold": 1, "kei": [1, 8, 11], "key_count": 1, "kind": 1, "know": 3, "known": 0, "label": 9, "languag": 7, "last": [0, 1, 8, 10], "latest": 6, "launch": [11, 13], "law": 0, "lead": 0, "learn": [1, 2, 3, 7, 8, 11, 13], "least": [0, 1], "leav": 0, "left": 9, "length": [1, 3, 10], "less": [1, 3], "let": 11, "letter": 0, "level": [1, 5, 11], "leverag": 3, "librari": [5, 7], "like": [0, 2, 3, 8, 11], "limit": 3, "line": [7, 11], "link": [0, 1, 3, 5, 7, 9], "link_run": 11, "linkrun": [7, 11], "list": [0, 1, 3, 4, 5, 9, 11, 12], "liter": 3, "ll": 11, "load": 11, "load_conf_fil": 11, "load_config": 11, "loc_a": 1, "loc_a_0": 1, "loc_a_1": 1, "loc_b": 1, "loc_b_0": 1, "loc_b_1": 1, "locat": [1, 3, 11], "log2": 9, "logic": [0, 2, 3], "logist": [5, 9], "logistic_regress": [3, 13], "logisticregress": 9, "long": [3, 12], "longest": 8, "look": [1, 2, 11, 12], "lookup": 1, "lower": [0, 1], "lowercas": 0, "lowercase_strip": 3, "lowest": 0, "lr": 11, "lsoundex": [3, 13], "m": [0, 1], "m_caution": [1, 3, 10, 13], "m_interacted_jw_m": [3, 10, 13], "m_namefrst": 1, "m_pre": [3, 10, 13], "machin": [1, 2, 3, 7, 8, 11, 13], "made": 6, "mai": [0, 2, 3, 7, 8, 11], "main": 11, "mainli": 1, "major": [1, 11], "make": [0, 1, 2, 3, 6, 13], "male": [3, 12], "mani": [2, 8, 11], "manual": 13, "map": [5, 8, 10], "mardurmatch": [1, 3], "mark": 2, "marriag": 1, "match": [0, 1, 2, 5, 7, 11, 12, 13], "matches_df": 11, "matrix": 13, "max": [1, 9, 11], "maxbin": 9, "maxdepth": [3, 9, 13], "maximum": [1, 9], "maximum_jw": 1, "mbpl": 1, "mbplmatch": [3, 13], "mean": [0, 2, 3], "meant": 3, "meet": 1, "member": [1, 8], "memori": 11, "men": 3, "messag": 11, "metadata": 8, "method": [11, 13], "mfbplmatch": 1, "mi": [3, 13], "mi0": 1, "mi1": 1, "mid_init_col": 1, "mid_init_match": 1, "middl": [0, 1], "might": 13, "minimum": [1, 9], "mininstancespernod": 9, "minu": [1, 3], "mismatch": 1, "miss": [1, 11], "ml": [3, 5, 9, 10], "mode": [5, 6, 13], "model": [5, 7, 11], "model_eval_repeat_fn": 11, "model_eval_repeat_fp": 11, "model_eval_training_data": 11, "model_eval_training_featur": 11, "model_eval_training_result": 11, "model_eval_training_vector": 11, "model_explor": [3, 11, 13], "model_paramet": [3, 8, 9, 13], "modifi": 3, "modul": 11, "moment": 8, "momloc": 1, "more": [0, 1, 2, 3, 10, 11, 13], "most": [0, 1, 8, 11], "mother": 10, "mother_birthyr": 1, "mpre": 1, "mr": 0, "much": [1, 8], "mult": 1, "multi": 1, "multipl": [0, 1, 5, 11], "multipli": 1, "must": [0, 1, 2, 3, 4, 9, 10, 12], "my": [11, 13], "my_conf": 11, "my_fil": 3, "myriad": 3, "n": [1, 9, 10], "n_training_iter": [3, 8, 13], "name": [0, 1, 3, 4, 11, 12], "name_col": 1, "name_std": [3, 12], "namefrst": [0, 1, 3], "namefrst_clean": [0, 3], "namefrst_init": 1, "namefrst_jw": [1, 2, 3, 13], "namefrst_mid_init": [0, 1], "namefrst_mid_init_2": 1, "namefrst_rel": 1, "namefrst_related_row": 1, "namefrst_split": [0, 3], "namefrst_std": [3, 12], "namefrst_std_jw": [3, 13], "namefrst_unstd": [1, 3], "namefrst_unstd_bigram": 4, "namelast": [1, 3], "namelast1": 1, "namelast2": 1, "namelast3": 1, "namelast_bigram": 3, "namelast_clean": [1, 3, 4], "namelast_clean_bigram": [3, 4], "namelast_clean_soundex": 4, "namelast_equal_as_int": 1, "namelast_frst_bigram": 4, "namelast_jw": [2, 3, 13], "namelast_jw_max": 1, "namelast_neighbor": 1, "namelast_popular": 1, "namelast_popularity_sum": 1, "nativ": 1, "nbor": [1, 3, 13], "ncount": [3, 4, 13], "ncount2": [3, 4, 13], "nearest": 0, "necessari": [4, 13], "need": [0, 1, 2, 3, 8, 11, 13], "neg": [3, 5, 7], "neighbor": 1, "neighborhood": 1, "neither": 1, "nest": [2, 3, 7], "new": [0, 3, 13], "new_marr": [1, 3], "newli": 3, "niu": 1, "no_first_pad": 4, "node": 9, "non": 0, "nonzero": 1, "nor": 1, "normal": 8, "note": [2, 3], "now": [2, 11], "null": [1, 3, 4], "null_fil": 3, "num": 11, "num_col": 1, "number": [0, 1, 3, 4, 8, 9, 11], "numer": [0, 1], "numtre": [3, 9, 13], "nvl": 1, "object": [1, 3, 7, 11], "often": [2, 4, 11], "onc": [1, 11], "one": [0, 1, 3, 8], "onethird": 9, "onli": [0, 1, 2, 3, 8, 13], "ons": 5, "oper": [0, 1, 2, 3], "option": [0, 1, 3, 4, 7, 8, 9, 11, 13], "or_group": 3, "order": [0, 3, 8], "org": 6, "organ": 11, "original_valu": 3, "oth": [3, 13], "other": [1, 3, 13], "otherwis": [0, 1, 10, 13], "our": 11, "out": [3, 4, 8, 13], "output": [0, 1, 2, 3, 4, 7, 8, 11, 13], "output_col": 4, "output_column": [3, 4, 10], "output_suspicious_td": [3, 13], "output_table_nam": 11, "output_typ": 3, "outsid": 10, "overrid": [0, 3], "override_column_a": [0, 3, 4], "override_column_b": [0, 3, 4], "override_transform": [0, 3], "overview": 5, "own": [1, 3], "p": 3, "packag": 6, "page": [1, 2, 3, 11], "pair": [1, 2, 3, 13], "param": [9, 13], "param_grid": [3, 13], "paramet": [3, 7, 8, 9, 11, 13], "parent": [0, 1, 12], "parent_step_chang": 1, "parenthes": 3, "parquet": [3, 8], "part": [3, 8], "particular": [1, 2, 3], "particularli": 1, "partit": 13, "pass": [1, 8, 9], "path": [1, 3, 11, 12, 13], "pattern": 11, "peopl": [0, 1, 7, 11], "per": [1, 3, 9, 10, 11], "percent": 8, "percentag": 1, "perform": [1, 3, 7, 8, 12], "persist": 11, "person": [0, 1, 7], "pip": 6, "pipelin": 5, "pipeline_featur": [3, 8, 10], "piplin": 3, "placehold": 1, "pleas": [3, 11], "plu": 3, "point": [10, 13], "popul": 8, "posit": [0, 1, 3, 5, 7], "possibl": 3, "post": [3, 8], "potenti": [1, 2, 5, 8], "potential_match": [1, 2, 8, 11], "potential_matches_prep": 11, "potential_matches_univers": [3, 8], "power": 5, "predefin": 1, "predict": [3, 13], "predicted_match": 11, "prediction_col": 3, "preexist": 11, "prefer": 13, "prefix": 0, "prep": 3, "prep_step": 11, "prepar": [7, 8, 11], "prepend": 4, "prepped_df_a": 11, "prepped_df_b": 11, "preprocess": [5, 7, 11, 13], "present": [1, 3, 10], "pretti": 2, "primari": [1, 7], "primarili": [1, 8], "print": 11, "probabilist": [3, 7], "probabl": [0, 3, 9], "probit": [3, 5], "proceed": 12, "process": [2, 3, 7, 11], "produc": [0, 11], "product": [1, 7, 13], "program": [1, 3, 8, 13], "project": 6, "prompt": 11, "provid": [0, 3, 4, 7, 8, 10, 11], "pull": 8, "punctuat": 0, "put": [11, 13], "py": [1, 3], "pypi": 5, "pyspark": [6, 9, 10, 11], "python": [6, 7, 11], "q": [11, 13], "qmark": 0, "qualifi": 3, "queri": [1, 3], "race": [0, 1, 3, 10, 13], "race_interacted_srac": [3, 10, 13], "racematch": 3, "rais": [1, 4], "random": [5, 9], "random_forest": [3, 13], "randomforestclassifi": 9, "rang": [3, 10], "rate": 1, "ratio": [3, 9], "ration": 0, "rationalize_name_word": 3, "raw": [0, 3, 8, 11], "raw_df_a": 11, "raw_df_b": 11, "read": [0, 1, 3, 8, 11], "readm": 3, "real": 7, "reason": 1, "recod": 0, "record": [0, 1, 2, 3, 7, 8], "recurs": [2, 3], "reduc": [4, 8], "refer": 3, "regex": 5, "regex_word_replac": 12, "region": [3, 13], "region_dict": 3, "regionf": 3, "regist": [8, 11], "regress": [5, 9], "regular": 13, "rel": [1, 3, 13], "relat": [0, 1, 3], "relate_a": [3, 10], "relate_col": 1, "relate_div_100": [0, 1, 3], "related_individual_max_jw": 1, "related_individual_row": 1, "relatematch": [1, 3], "relatetyp": [3, 10], "relatetype_interacted_relatematch": 3, "relationship": 7, "relev": 13, "reload": 11, "remain": 8, "remov": 0, "remove_alternate_nam": 3, "remove_qmark_hyphen": 3, "remove_suffix": 3, "repeat_fn": 13, "repeat_fp": 13, "repeatedli": 3, "replac": [0, 1, 2, 5], "replace_apostroph": 3, "replaced_birthyr": [1, 3, 4], "report": [1, 5, 7, 11], "repositori": 6, "repres": [0, 1, 3, 10, 11], "represent": [3, 8], "reproduc": 11, "request": 8, "requir": [0, 1, 2, 3, 4, 5, 8, 10, 11, 12], "research": 7, "reshap": 8, "resourc": 4, "respect": [1, 8], "restrict": 2, "result": [0, 1, 7, 10, 11, 13], "return": [1, 4, 9, 11], "reus": 5, "right": [9, 11], "risk": 1, "road": 0, "robust": 3, "root": 6, "round": [0, 3], "row": [0, 3, 4], "rule": [2, 7, 8], "run": [5, 6, 7, 8, 13], "run_all_step": [3, 11, 13], "run_step": 11, "runtim": 8, "sai": 11, "same": [0, 1, 2, 3, 4, 7, 8, 11], "sampl": 1, "satisfi": [2, 3, 8], "save": [3, 8, 13], "scala": 1, "scale": 3, "scale_data": [3, 13], "scenario": 13, "schema": 3, "score": [1, 3, 8, 10], "score_with_model": [3, 13], "scored_potential_match": 11, "scratch": 3, "script": [3, 7, 11], "search": 1, "second": [0, 1, 3, 12], "secondari": 1, "secondary_distance_col": 1, "secondary_distances_fil": 1, "secondary_key_count": 1, "secondary_loc_a": 1, "secondary_loc_b": 1, "secondary_source_column": 1, "secondary_table_nam": 1, "section": [0, 1, 2, 3, 13], "see": [1, 3, 6, 11, 13], "seen": 1, "select": [0, 1, 5, 7, 11, 13], "separ": 8, "sequenc": 7, "seri": 11, "serialp": 3, "serv": 1, "set": [0, 1, 2, 3, 4, 7, 8, 11, 13], "set_executor_memori": 11, "set_link_task": 11, "set_loc": 11, "set_num_cor": 11, "set_preexisting_t": 11, "set_print_sql": 11, "set_value_column_a": [3, 4], "set_value_column_b": [3, 4], "sever": [1, 7], "sex": [1, 3, 12], "sex_equ": 3, "sex_region_interact": 3, "sex_regionf_interact": 3, "sexmatch": 3, "sgen": [1, 3, 13], "share": 7, "short": 3, "should": [1, 3, 4, 9, 10, 11], "show": 11, "showf": 11, "shut": 13, "sibl": 0, "sign": 1, "signific": 0, "similar": [1, 3], "simpli": [0, 3], "simplifi": 1, "sinc": [0, 3], "singl": [0, 4, 11, 13], "size": 1, "skip": [3, 4, 8], "sm_bpl": 1, "sm_namefrst": 1, "sm_sex": 1, "small": 3, "smaller": 7, "smallest": 3, "sn_bpl": 1, "sn_namefrst": 1, "sn_sex": 1, "so": [0, 1, 2, 3, 6, 13], "some": [0, 1, 2, 3, 4, 7, 8, 11], "someth": 11, "sometim": 3, "somewhat": 2, "soundex": 5, "sourc": [1, 5, 8, 11, 13], "source_column_a": 1, "source_column_b": 1, "sp": 1, "sp_caution": [1, 3, 13], "sp_interacted_jw_sp": [3, 13], "sp_pre": [3, 13], "space": [0, 3, 4, 12], "span": 13, "spark": [1, 3, 4, 6, 9, 10, 11, 13], "sparkfactori": 11, "sparksess": 11, "special": 1, "specif": [1, 3, 11], "specifi": [0, 1, 3, 7, 8, 10, 11, 12], "split": [3, 4, 8, 9, 10, 13], "split_by_id_a": [3, 13], "sploc": 1, "spous": 0, "spouse_birthyr": 1, "spouse_bpl": 1, "sql": [0, 1, 2, 3, 4, 5, 8, 11], "sql_condit": 3, "sqrt": 9, "squar": 1, "sr": [0, 3], "srace": [3, 10, 13], "stage": 3, "standard": [0, 1, 12], "start": [0, 12], "state": [1, 7], "state_1900_1910_distances_km": 1, "state_dist": 1, "state_distance_lookup": 1, "statecode1": 1, "statecode2": 1, "statefip": [0, 1, 3], "statefip_h": 3, "step": [0, 1, 3, 7], "stepmom": 1, "still": 12, "stop": 0, "street": [0, 1], "street_jw": [1, 3, 13], "street_unstd": 12, "strictli": 10, "string": [0, 1, 3, 4, 8, 9, 11, 12], "strip": [0, 8], "structur": [2, 3, 7], "sub": [1, 2, 3], "subhead": 12, "subset": [3, 12], "substitut": [5, 8], "substitution_column": [3, 8, 12], "substitution_fil": [3, 12], "substitutions_street_abbrev": 12, "subtract": 1, "suffix": 0, "suppli": 12, "support": [0, 2, 3, 8, 9], "suppos": [0, 2, 3], "sure": [2, 3, 6, 11], "surnam": 1, "surround": 0, "suspect": [1, 7], "swap": 12, "syntax": 2, "system": 6, "t": [0, 1, 3, 4, 13], "tabl": [1, 2, 3, 5, 8, 11, 13], "table_nam": 1, "tablenam": 11, "tailor": 7, "take": [0, 1, 2, 3, 4, 8, 11], "taken": [0, 1], "task": [2, 3, 5, 7, 9, 13], "task_nam": 11, "tell": [1, 3, 4], "templat": 1, "ten": 1, "term": 3, "test": [3, 8, 13], "text": 11, "than": [0, 1, 2, 3, 9], "thei": [0, 1, 2, 3, 8, 11], "them": [0, 1, 3, 8], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13], "thing": 3, "those": [1, 3], "though": 1, "thought": 8, "three": [2, 3, 8], "threshold": [2, 3, 9, 13], "threshold_expr": [2, 3], "threshold_ratio": [3, 9, 13], "through": [6, 8, 10, 11], "thu": 1, "time": [0, 3, 8, 11], "to_icpsrctyi": 1, "to_statefip": 1, "togeth": [0, 1, 3], "toml": [3, 7, 11], "tool": [6, 7], "top": [1, 5], "topic": 11, "total": 8, "train": [1, 5, 7, 9, 11], "training_data": [3, 11], "training_data_1900_1910": 13, "training_data_1900_1910_hlink_featur": 13, "training_data_subset": 3, "training_featur": [11, 13], "training_result": 13, "transform": [1, 3, 5, 7, 8], "transformer_typ": [3, 10], "treat": [3, 10], "tree": [5, 9], "true": [1, 3, 4, 8, 10, 12, 13], "try": 3, "tune": [7, 13], "tutori": [3, 11], "two": [0, 1, 2, 3, 4, 7, 8, 10, 11, 13], "type": [0, 3, 4, 5, 8, 9, 11, 12, 13], "typic": [3, 8], "u": 0, "ugli": 2, "under": [1, 3], "understand": 7, "union": 5, "uniqu": 3, "unit": 7, "univers": [1, 5, 8], "unknown": 1, "unlik": 2, "unrel": 1, "unstabl": 11, "up": [1, 3, 11, 12], "updat": 13, "upper": 1, "upper_threshold": 3, "uppercas": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13], "us1900": 3, "us1900m_usa": 3, "us1910": 3, "us1910m_usa": 3, "usag": [3, 4, 5, 11], "use_potential_matches_featur": 13, "use_training_data_featur": [3, 8, 13], "user": [1, 11], "usual": [0, 8, 13], "util": 4, "v": 3, "valu": [0, 1, 2, 3, 4, 9, 10, 11, 12], "var": [1, 3], "vari": [0, 3, 4], "variabl": [0, 1, 3, 13], "variant": 12, "varieti": 8, "ve": 13, "vector": [3, 10], "verbos": 2, "veri": [4, 8, 13], "version": [0, 6, 13], "vi": 3, "via": [6, 7], "vii": 3, "viii": 3, "volumn": 1, "wa": [1, 13], "wai": [1, 2, 3, 6, 11], "want": [0, 1, 3, 11, 13], "washington": 3, "we": [0, 1, 11, 13], "well": 3, "were": 3, "weren": 13, "what": [1, 3, 11, 13], "when": [0, 1, 2, 3, 4, 8, 13], "where": [1, 8, 11, 13], "whether": [1, 3, 12], "which": [0, 1, 2, 3, 4, 7, 8, 10, 11, 13], "white": 0, "whitespac": [0, 8], "who": 1, "whole": [0, 7], "whose": 1, "width": 8, "winkler": [1, 10], "within": [1, 3, 7, 11, 12], "word": [0, 5], "work": [0, 1, 3, 6, 8, 11, 13], "workflow": [4, 5], "world": 7, "would": [1, 2, 3, 13], "write": [1, 11, 13], "written": [3, 7], "x": [3, 10, 11], "x_crosswalk": 11, "x_hh_tfam": 11, "x_hh_tfam_2a": 11, "x_hh_tfam_2b": 11, "x_load": 11, "x_parquet_from_csv": 11, "x_persist": 11, "x_sql": 11, "x_sqlf": 11, "x_summari": 11, "x_tab": 11, "x_tfam": 11, "x_tfam_raw": 11, "x_union": 11, "y": 10, "year": [0, 1, 3, 4, 5], "year_b": 1, "yet": 11, "you": [0, 1, 2, 3, 6, 11, 12, 13], "your": [2, 3, 4, 6, 8, 11, 13], "yrimmig": 1, "zero": 1}, "titles": ["Column Mappings", "Comparison Features", "Comparisons", "Configuration", "Feature Selection Transforms", "Welcome to hlink\u2019s documentation!", "Installation", "Introduction", "Link Tasks", "Models", "Pipeline generated features", "Running hlink", "Substitutions", "Advanced Workflow Examples"], "titleterms": {"": 5, "1": 12, "abs_diff": 1, "add": 1, "add_to_a": 0, "advanc": [0, 3, 13], "after": 13, "aggreg": 1, "alia": 1, "all_equ": 1, "any_equ": 1, "api": 5, "arrai": 4, "array_index": 0, "b_minus_a": 1, "basic": [0, 3], "bigram": 4, "block": 3, "btwn_threshold": 1, "bucket": 10, "caution_comp_3": 1, "caution_comp_3_012": 1, "caution_comp_4": 1, "caution_comp_4_012": 1, "column": [0, 3], "comparison": [1, 2, 3], "concat_to_a": 0, "concat_to_b": 0, "concat_two_col": 0, "condense_strip_whitespac": 0, "config": 3, "configur": [3, 5, 8], "data": [3, 12, 13], "decision_tre": 9, "defin": 2, "differ": 13, "divide_by_int": 0, "document": 5, "either_are_0": 1, "either_are_1": 1, "equal": 1, "equals_as_int": 1, "exact_mult": 1, "exampl": [11, 13], "explor": [8, 13], "export": 13, "extra_children": 1, "f1_match": 1, "f2_match": 1, "fals": 13, "featur": [1, 3, 4, 10, 13], "fetch_a": 1, "fetch_b": 1, "file": 3, "filter": 3, "fn": 13, "fp": 13, "from": 6, "gener": [3, 10, 13], "geo_dist": 1, "get_floor": 0, "gradient_boosted_tre": 9, "gt_threshold": 1, "hit": 1, "hits2": 1, "hlink": [5, 11], "household": [1, 3, 8], "instal": 6, "interact": [10, 11], "introduct": 7, "jaro_winkl": 1, "jaro_winkler_r": 1, "jaro_winkler_street": 1, "jw_max_a": 1, "jw_max_b": 1, "length_b": 1, "level": 3, "librari": 11, "link": [8, 11, 13], "list": 13, "logistic_regress": 9, "look_at_addl_var": 1, "lower_threshold": 1, "lowercase_strip": 0, "map": [0, 3], "match": [3, 8], "max_jaro_winkl": 1, "maximum_jaro_winkl": 1, "ml": 13, "mode": 11, "model": [3, 8, 9, 13], "multi_jaro_winkler_search": 1, "multipl": [2, 3], "neg": 13, "neither_are_nul": 1, "not_equ": 1, "not_zero_and_not_equ": 1, "ons": 1, "overview": [2, 7, 8], "pipelin": [3, 10], "posit": 13, "potenti": [3, 13], "power": [1, 4], "preprocess": 8, "present_and_equal_categorical_in_univers": 1, "present_and_matching_categor": 1, "present_and_not_equ": 1, "present_both_year": 1, "probit": 9, "program": 11, "pypi": 6, "random_forest": 9, "rationalize_name_word": 0, "regex": 12, "rel_jaro_winkl": 1, "relat": 8, "remove_alternate_nam": 0, "remove_one_letter_nam": 0, "remove_prefix": 0, "remove_punctu": 0, "remove_qmark_hyphen": 0, "remove_stop_word": 0, "remove_suffix": 0, "replac": 12, "replace_apostroph": 0, "report": 8, "requir": 6, "reus": 13, "run": 11, "second_gen_imm": 1, "section": 8, "select": [3, 4], "singl": 3, "soundex": 4, "sourc": [3, 6], "split": 0, "sql_condit": [1, 4], "start": 11, "step": [8, 11], "substitut": [3, 12], "substr": 0, "sum": 1, "tabl": 12, "task": [8, 11], "threshold": 1, "time": 1, "top": 3, "train": [3, 8, 13], "transform": [0, 4, 10], "type": [1, 2, 10], "union": 4, "univers": 3, "upper_threshold": 1, "us": 11, "usag": 0, "welcom": 5, "when_valu": 0, "word": 12, "workflow": [11, 13], "year": 13}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"1:1 substitution by data table": [[12, "substitution-by-data-table"]], "Advanced Config File": [[3, "advanced-config-file"]], "Advanced Usage": [[0, "advanced-usage"]], "Advanced Workflow Examples": [[13, null]], "Aggregate Features": [[1, "aggregate-features"]], "Basic Config File": [[3, "basic-config-file"]], "Basic Usage": [[0, "basic-usage"]], "Blocking": [[3, "blocking"]], "Column Mappings": [[0, null], [3, "column-mappings"]], "Comparison Features": [[1, null], [3, "comparison-features"]], "Comparison Types": [[1, "comparison-types"], [2, "comparison-types"]], "Comparisons": [[2, null], [3, "comparisons"]], "Configuration": [[3, null]], "Configuration API": [[5, "configuration-api"], [5, null]], "Data sources": [[3, "data-sources"]], "Defining Multiple Comparisons": [[2, "defining-multiple-comparisons"]], "Example interactive mode workflow": [[11, "example-interactive-mode-workflow"]], "Example model exploration and FP/FN export workflow": [[13, "example-model-exploration-and-fp-fn-export-workflow"]], "Example training data export with generated ML features": [[13, "example-training-data-export-with-generated-ml-features"]], "Export training data after generating features to reuse in different linking years": [[13, "export-training-data-after-generating-features-to-reuse-in-different-linking-years"]], "Feature Selection Transforms": [[4, null]], "Feature Selections": [[3, "feature-selections"]], "Feature add-ons": [[1, "feature-add-ons"]], "Filter": [[3, "filter"]], "Household Aggregate Features": [[1, "household-aggregate-features"]], "Household Comparisons": [[3, "household-comparisons"]], "Household Matching": [[8, "household-matching"]], "Household training and models": [[3, "household-training-and-models"]], "Installation": [[6, null]], "Installing from PyPI": [[6, "installing-from-pypi"]], "Installing from source": [[6, "installing-from-source"]], "Interactive Mode": [[11, "interactive-mode"]], "Introduction": [[7, null]], "Link Tasks": [[8, null]], "ML model exploration and export of lists of potential false positives/negatives in training data": [[13, "ml-model-exploration-and-export-of-lists-of-potential-false-positives-negatives-in-training-data"]], "Matching": [[8, "matching"]], "Model Exploration and Household Model Exploration": [[8, "model-exploration-and-household-model-exploration"]], "Models": [[9, null]], "Multiple Comparisons": [[3, "multiple-comparisons"]], "Overview": [[2, "overview"], [7, "overview"], [8, "overview"], [8, "id1"], [8, "id4"], [8, "id7"], [8, "id10"], [8, "id13"]], "Pipeline generated features": [[10, null]], "Pipeline-generated Features": [[3, "pipeline-generated-features"]], "Potential Matches Universe": [[3, "potential-matches-universe"]], "Preprocessing": [[8, "preprocessing"]], "Related Configuration Sections": [[8, "related-configuration-sections"], [8, "id3"], [8, "id6"], [8, "id9"], [8, "id12"], [8, "id15"]], "Reporting": [[8, "reporting"]], "Requirements": [[6, "requirements"]], "Running Linking Tasks and Steps": [[11, "running-linking-tasks-and-steps"]], "Running hlink": [[11, null]], "Single Comparison": [[3, "single-comparison"]], "Starting the program": [[11, "starting-the-program"]], "Substitution Columns": [[3, "substitution-columns"]], "Substitution by regex word replace": [[12, "substitution-by-regex-word-replace"]], "Substitutions": [[12, null]], "Task steps": [[8, "task-steps"], [8, "id2"], [8, "id5"], [8, "id8"], [8, "id11"], [8, "id14"]], "Top level configs": [[3, "top-level-configs"]], "Training and Household Training": [[8, "training-and-household-training"]], "Training and models": [[3, "training-and-models"]], "Transformer types": [[10, "transformer-types"]], "Transforms": [[0, "transforms"]], "Using hlink as a Library": [[11, "using-hlink-as-a-library"]], "Welcome to hlink\u2019s documentation!": [[5, null]], "abs_diff": [[1, "abs-diff"]], "add_to_a": [[0, "add-to-a"]], "alias": [[1, "alias"]], "all_equals": [[1, "all-equals"]], "and": [[1, "and"]], "any_equals": [[1, "any-equals"]], "array": [[4, "array"]], "array_index": [[0, "array-index"]], "b_minus_a": [[1, "b-minus-a"]], "bigrams": [[4, "bigrams"]], "btwn_threshold": [[1, "btwn-threshold"]], "bucketizer": [[10, "bucketizer"]], "caution_comp_3": [[1, "caution-comp-3"]], "caution_comp_3_012": [[1, "caution-comp-3-012"]], "caution_comp_4": [[1, "caution-comp-4"]], "caution_comp_4_012": [[1, "caution-comp-4-012"]], "concat_to_a": [[0, "concat-to-a"]], "concat_to_b": [[0, "concat-to-b"]], "concat_two_cols": [[0, "concat-two-cols"]], "condense_strip_whitespace": [[0, "condense-strip-whitespace"]], "decision_tree": [[9, "decision-tree"]], "divide_by_int": [[0, "divide-by-int"]], "either_are_0": [[1, "either-are-0"]], "either_are_1": [[1, "either-are-1"]], "equals": [[1, "equals"]], "equals_as_int": [[1, "equals-as-int"]], "exact_mult": [[1, "exact-mult"]], "extra_children": [[1, "extra-children"]], "f1_match": [[1, "f1-match"]], "f2_match": [[1, "f2-match"]], "fetch_a": [[1, "fetch-a"]], "fetch_b": [[1, "fetch-b"]], "geo_distance": [[1, "geo-distance"]], "get_floor": [[0, "get-floor"]], "gradient_boosted_trees": [[9, "gradient-boosted-trees"]], "gt_threshold": [[1, "gt-threshold"]], "hits": [[1, "hits"]], "hits2": [[1, "hits2"]], "interaction": [[10, "interaction"]], "jaro_winkler": [[1, "jaro-winkler"]], "jaro_winkler_rate": [[1, "jaro-winkler-rate"]], "jaro_winkler_street": [[1, "jaro-winkler-street"]], "jw_max_a": [[1, "jw-max-a"]], "jw_max_b": [[1, "jw-max-b"]], "length_b": [[1, "length-b"]], "logistic_regression": [[9, "logistic-regression"]], "look_at_addl_var": [[1, "look-at-addl-var"]], "lower_threshold": [[1, "lower-threshold"]], "lowercase_strip": [[0, "lowercase-strip"]], "mapping": [[0, "mapping"]], "max_jaro_winkler": [[1, "max-jaro-winkler"]], "maximum_jaro_winkler": [[1, "maximum-jaro-winkler"]], "multi_jaro_winkler_search": [[1, "multi-jaro-winkler-search"]], "neither_are_null": [[1, "neither-are-null"]], "not_equals": [[1, "not-equals"]], "not_zero_and_not_equals": [[1, "not-zero-and-not-equals"]], "or": [[1, "or"]], "power": [[1, "power"], [4, "power"]], "present_and_equal_categorical_in_universe": [[1, "present-and-equal-categorical-in-universe"]], "present_and_matching_categorical": [[1, "present-and-matching-categorical"]], "present_and_not_equal": [[1, "present-and-not-equal"]], "present_both_years": [[1, "present-both-years"]], "probit": [[9, "probit"]], "random_forest": [[9, "random-forest"]], "rationalize_name_words": [[0, "rationalize-name-words"]], "rel_jaro_winkler": [[1, "rel-jaro-winkler"]], "remove_alternate_names": [[0, "remove-alternate-names"]], "remove_one_letter_names": [[0, "remove-one-letter-names"]], "remove_prefixes": [[0, "remove-prefixes"]], "remove_punctuation": [[0, "remove-punctuation"]], "remove_qmark_hyphen": [[0, "remove-qmark-hyphen"]], "remove_stop_words": [[0, "remove-stop-words"]], "remove_suffixes": [[0, "remove-suffixes"]], "replace_apostrophe": [[0, "replace-apostrophe"]], "second_gen_imm": [[1, "second-gen-imm"]], "soundex": [[4, "soundex"]], "split": [[0, "split"]], "sql_condition": [[1, "sql-condition"], [4, "sql-condition"]], "substring": [[0, "substring"]], "sum": [[1, "sum"]], "threshold": [[1, "threshold"]], "times": [[1, "times"]], "union": [[4, "union"]], "upper_threshold": [[1, "upper-threshold"]], "when_value": [[0, "when-value"]], "xgboost": [[9, "xgboost"]]}, "docnames": ["column_mappings", "comparison_features", "comparisons", "config", "feature_selection_transforms", "index", "installation", "introduction", "link_tasks", "models", "pipeline_features", "running_the_program", "substitutions", "use_examples"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["column_mappings.md", "comparison_features.md", "comparisons.md", "config.md", "feature_selection_transforms.md", "index.rst", "installation.md", "introduction.md", "link_tasks.md", "models.md", "pipeline_features.md", "running_the_program.md", "substitutions.md", "use_examples.md"], "indexentries": {}, "objects": {}, "objnames": {}, "objtypes": {}, "terms": {"": [0, 1, 3, 7, 8, 9, 11, 12], "0": [0, 1, 2, 3, 8, 9, 10, 11, 13], "005": 3, "012": 1, "05": [1, 3, 9], "1": [0, 1, 2, 3, 5, 8, 9, 10, 11, 13], "10": [0, 3, 6, 13], "100": [0, 3, 13], "11": [0, 3, 6, 10], "12": [0, 6], "14": 1, "15": 9, "1867": 3, "1868": 3, "1869": 3, "1870": 3, "1871": 3, "1872": 3, "1873": 3, "1900": [3, 13], "1900_1910_potential_fn": 13, "1900_1910_potential_fp": 13, "1900_1910_training_data_20191023": 3, "1900_1910_training_result": 13, "1910": [1, 3, 13], "1920": 13, "1930": [3, 13], "1940": [3, 13], "1999": [0, 3], "2": [0, 1, 2, 3, 4, 8, 9, 12, 13], "20": 9, "25": 1, "3": [0, 1, 2, 3, 6, 8, 9, 10, 13], "300": 0, "301": 0, "302": 0, "303": 0, "4": [0, 1, 9], "5": [0, 1, 2, 3, 9, 10, 11, 13], "50": [3, 13], "50g": 13, "53": 3, "5g": 11, "6": [0, 3, 9, 10, 13], "65": 3, "7": [0, 1, 3, 9, 13], "75": [3, 9, 13], "79": [2, 3], "7th": 12, "8": [1, 3, 6, 9, 11], "80": 3, "84": [2, 3], "85": [9, 11], "9": 1, "95": 1, "99": [1, 3], "9998": 0, "9999": [0, 3, 10], "A": [0, 1, 2, 3, 4, 10, 11], "AND": [1, 2, 3], "As": [0, 2], "At": [7, 8], "But": [3, 6], "By": [0, 3, 11], "For": [0, 1, 2, 3, 8, 11, 13], "If": [0, 1, 3, 4, 8, 9, 11, 12], "In": [0, 1, 2, 7, 11, 13], "It": [0, 1, 2, 3, 7, 9, 11, 13], "NOT": 1, "OR": [1, 2, 3], "THEN": 1, "The": [0, 1, 2, 3, 4, 6, 8, 9, 10, 11, 13], "Then": [0, 6], "There": [1, 3, 8], "These": [0, 1, 2, 3, 7, 8, 9, 10], "To": [0, 1, 6, 8, 9, 11], "Will": 3, "With": [0, 10], "_": [0, 1, 2, 3, 4, 5, 9, 10, 11], "_a": 1, "_bpl": 1, "_namefrst": 1, "_sex": 1, "a304bt": 3, "ab": 1, "abbrevi": [8, 12], "abl": 3, "about": [3, 11, 13], "abov": [1, 2, 6], "absolut": 1, "accept": [1, 3, 13], "access": [11, 13], "accord": 1, "across": 1, "ad": [0, 1, 2, 3, 9], "add": [0, 2, 5], "add_to_a": 3, "addit": [0, 1, 3, 6, 7, 11], "addl": 1, "addl_var": 1, "adjust": 11, "adopt": 0, "advanc": 5, "affect": 4, "after": [1, 2, 3, 5, 9, 11], "ag": [0, 1, 3, 4], "against": [1, 3, 12], "age_2": 3, "age_at_dataset_b": 0, "age_threshold": 1, "aggreg": 5, "ah": 0, "ahead": 8, "akin": 1, "algorithm": [1, 2, 7, 8], "alia": [0, 3, 8], "all": [0, 1, 3, 4, 8, 9, 10, 11], "allow": [1, 3, 8, 13], "along": 1, "alpha": 9, "alphabet": 0, "alphanumer": 3, "also": [0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 13], "altern": [0, 3, 9], "although": 3, "alwai": 2, "among": 1, "amount": 3, "an": [0, 1, 2, 3, 7, 9, 11], "analysi": 13, "analyz": [11, 13], "ani": [1, 3, 4, 6, 9], "anoth": [0, 1, 3, 4, 8, 9], "anyon": 8, "anywher": 12, "apach": 6, "apart": 0, "api": [3, 7, 10], "apostroph": 0, "appear": [0, 1], "appli": [0, 2, 3, 4, 8, 13], "apply_model": 3, "appropri": 3, "ar": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 13], "arbitrari": 0, "area": 3, "aren": 1, "arg": 13, "argument": [1, 11, 13], "around": 3, "arrai": [0, 3, 5, 9, 10], "array_index": 3, "ask": 13, "aspect": [8, 11], "assert": [1, 11], "assum": 3, "attach_vari": 3, "attempt": 3, "attribut": [0, 1, 2, 3, 4, 8, 9, 10, 11, 12], "auto": 9, "automat": [0, 3, 6, 8], "av": 12, "avail": [0, 1, 3, 4, 6, 8, 9, 10, 13], "avenu": [0, 12], "b": [0, 1, 2, 3, 4, 11], "b200": 3, "back": 1, "backup": 1, "base": [1, 3, 8], "basic": 5, "becaus": 2, "been": 7, "befor": [0, 1, 3, 4, 6, 8], "begin": 11, "behind": 0, "being": [1, 8], "belong": 3, "below": [0, 1, 3, 4, 9, 10, 11], "best": [3, 8], "beta": [3, 9], "better": [0, 7], "between": [0, 1, 2, 3, 7, 8, 11, 13], "beyond": 1, "bigint": 3, "bigram": [3, 5], "bin": 9, "binomi": 9, "birth": 1, "birthplac": [0, 3], "birthyr": [0, 3], "birthyr_3": 3, "birthyr_col": 1, "block": [2, 5, 8], "blvd": 0, "boolean": [1, 3, 4, 12, 13], "boost": [5, 9], "born": 1, "borrow_t": 11, "both": [0, 1, 2, 3, 8, 13], "boundari": 1, "bpl": [0, 1, 3], "bpl1": 3, "bpl2": 3, "bpl2_str": 3, "bpl3": 3, "bpl_clean": 3, "bpl_orig": 3, "bpl_root": 0, "bpl_str": 3, "bplmatch": 3, "broken": 7, "btwn": 1, "bucket": [3, 8], "built": 6, "builtin": 1, "byrdifcat": 3, "byrdiff": [1, 3, 13], "c": 1, "c201": 3, "calcul": [1, 13], "call": 0, "can": [0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 13], "cannot": 3, "care": 2, "cartesian": 1, "case": [0, 1, 2, 3, 4, 7], "cast": 1, "categor": [1, 3, 9, 10], "categori": [0, 9], "caus": 9, "caution": [1, 10], "censu": [0, 8, 13], "census": 7, "chang": [0, 1, 3, 6, 11, 13], "charact": 0, "characterist": [7, 8], "check": [1, 11], "check_val_expr": 1, "checkpoint": 4, "child": [0, 9], "children": 1, "chines": 0, "choic": 11, "chosen": 8, "chosen_model": [3, 9, 13], "circl": 0, "circumst": 1, "class": 11, "classif": [8, 9], "claus": [0, 1], "clean": [0, 7], "clean_birthyr": [0, 3, 4], "clone": 6, "code": [0, 1, 3, 6], "coeffici": [3, 8], "col": [0, 1], "col_to_add": 3, "col_to_join_on": 3, "col_typ": 3, "column": [1, 4, 5, 8, 10, 11, 12, 13], "column_map": [0, 3, 8], "column_nam": [0, 1, 3, 12], "column_to_append": 0, "combin": [1, 2, 3, 4, 8], "come": 1, "command": [3, 7, 11, 13], "comment": 4, "common": [7, 8, 13], "commonli": 9, "comp": 1, "comp_a": [1, 2, 3], "comp_b": [1, 2, 3], "comp_c": 1, "comp_d": 1, "compar": [0, 1, 3, 7, 8], "comparis": 3, "comparison": [5, 8], "comparison_featur": [1, 2, 3, 8], "comparison_typ": [1, 2, 3], "complet": 11, "complex": [3, 4], "comput": [1, 4, 8], "concat": 0, "concaten": [0, 1], "condens": 0, "condense_strip_whitespac": 3, "condit": [0, 1, 2, 3, 4, 5, 8], "conf": [11, 13], "config": [1, 4, 5, 8, 11, 13], "configur": [0, 1, 2, 7, 11, 13], "conjuct": 3, "conjunct": 3, "connect": [2, 3], "consid": [1, 2, 9], "consider": 1, "consol": 11, "constraint": [1, 2], "construct": 8, "contain": [0, 1, 3, 12], "context": 10, "continu": [9, 10, 13], "conveni": 11, "convert": [0, 1, 3], "convert_ints_to_long": 3, "copi": [4, 13], "core": [1, 8, 11, 13], "correspond": [7, 8], "could": [0, 2, 3], "count": [1, 11, 13], "counti": [0, 1], "county_1900_1910_distances_km": 1, "county_a": 1, "county_b": 1, "county_dist": [1, 3, 13], "county_distance_lookup": 1, "county_distance_squar": [1, 3, 13], "county_state_dist": 1, "court": 0, "cover": 10, "coverag": 3, "cpu": 11, "creat": [0, 3, 7, 8, 10, 11, 12, 13], "creation": 3, "crosswalk": 8, "csv": [1, 3, 8, 11, 12, 13], "current": [1, 2, 3, 9, 11], "d": 1, "d425": 3, "data": [0, 1, 5, 7, 8, 11], "databas": 11, "datafram": [8, 11, 13], "dataset": [0, 1, 3, 4, 7, 8, 11, 13], "datasourc": [1, 3, 11], "datasource_a": [3, 8], "datasource_b": [3, 8], "de": 9, "decis": [3, 5, 9, 13], "decisiontreeclassifi": 9, "default": [0, 1, 2, 3, 8, 9, 11], "defin": [1, 3, 5, 8, 9, 10, 11], "definit": [3, 8], "demograph": 8, "depend": [1, 2, 3, 6, 9, 13], "dependent_var": [3, 13], "depth": 9, "deriv": 13, "derived_from": 3, "desc": 11, "describ": [0, 1, 2, 3, 11], "descript": [3, 9, 11], "detail": [0, 3, 11], "detect": 9, "determin": [1, 8], "determinist": [7, 8], "dev": 6, "develop": [6, 7], "df": [3, 11], "dictionari": 11, "diff": 1, "differ": [0, 1, 3, 5, 7, 8], "digit": 0, "dir": 12, "directli": 2, "directori": [6, 11, 13], "disabl": 9, "discard": 9, "discret": 9, "discuss": 3, "dist": 1, "dist_tabl": 1, "distanc": [1, 9], "distance_col": 1, "distance_km": 1, "distances_fil": 1, "distinct": 1, "divid": 0, "divide_by_int": 3, "do": [0, 1, 2, 4, 11, 13], "doc": [9, 10], "document": [1, 3, 9, 11, 13], "doe": [1, 4, 8, 13], "don": [3, 4], "doubl": 10, "down": [0, 7, 13], "drastic": 8, "drop": [0, 3, 11], "drop_al": 11, "drop_all_prc": 11, "drop_all_temp": 11, "drop_data_from_scored_match": 3, "drop_duplicate_a": 3, "drop_duplicate_with_threshold_ratio": [3, 13], "duplic": [3, 9], "durat": 1, "dure": [1, 2, 3, 8], "durmarr": [1, 3], "e": 6, "each": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11], "easiest": [6, 11], "easili": 11, "edit": 6, "effect": [2, 4], "either": [0, 1, 2, 3, 7, 12], "either_0": 1, "either_1": 1, "element": 0, "elig": 8, "els": [1, 3, 4], "else_v": 1, "else_valu": 0, "empti": 1, "enabl": [3, 8, 9, 11], "encod": [3, 4], "end": [0, 1, 3, 4, 12], "ensur": 3, "enter": 11, "entir": [0, 3], "enum_dist": 1, "enumer": 11, "equal": [2, 3, 12], "equal_and_not_null_templ": 1, "equival": 3, "error": [3, 9, 10], "especi": 3, "eta": 9, "etc": 13, "eval": 3, "evalu": [1, 3, 7, 8, 9], "even": 1, "everi": [1, 4], "ex": 1, "exact": [1, 3], "exact_mult": [3, 13], "exampl": [0, 1, 2, 3], "except": [3, 10], "exclud": 1, "excute_command": 11, "execut": 11, "execute_command": 11, "execute_task": 11, "executor": 11, "executor_memori": [11, 13], "exist": [1, 3, 11], "exit": 11, "expand": 3, "expand_length": 3, "expect": 1, "experi": 7, "experiment": [8, 11], "explan": 9, "explicitli": [1, 10], "explod": [2, 3, 8], "exploded_df_a": 11, "exploded_df_b": 11, "explor": [3, 5, 7, 11], "expon": 4, "exponenti": 1, "export": [5, 8, 11], "express": [0, 1, 3], "extend": 1, "extens": 8, "extra": [1, 9], "extract": 3, "f": [1, 11], "f1": 1, "f1_match": 3, "f2": 1, "f2_match": 3, "f_caution": [3, 13], "f_interacted_jw_f": [3, 13], "f_pre": [3, 13], "factori": 11, "fail": 3, "fallback": 1, "fals": [1, 3, 4, 5, 7, 11], "famili": 9, "father_namefrst": 1, "favor": 1, "fbpl": 1, "fbpl_nomatch": 1, "fbplmatch": [3, 13], "featur": [2, 5, 7, 8, 9, 11], "feature_import": [3, 8, 13], "feature_nam": [2, 3], "feature_select": [2, 3, 4, 8], "featuresubsetstrategi": 9, "fed": [3, 8], "femal": [3, 12], "fetch": 1, "fetch_a": 3, "few": 4, "fewer": [1, 9], "fi": 1, "file": [1, 4, 5, 7, 8, 11, 12, 13], "filepath": 11, "fill": 1, "filter": [1, 2, 5, 8, 12], "final": [1, 3, 13], "find": [1, 8, 13], "finish": 11, "first": [0, 1, 3, 6, 8, 11, 12], "first_init_col": 1, "first_nam": 0, "five": 11, "fix": 8, "flag": [1, 2, 3, 10, 11, 13], "flexibl": [1, 2], "float": [1, 3, 9], "floor": 0, "focus": [2, 8], "follow": [0, 1, 2, 7, 11, 12, 13], "foreign": 1, "forest": [5, 9], "form": [1, 3, 8, 12], "format": 0, "four": 1, "framework": 13, "from": [0, 1, 3, 5, 7, 8, 9, 10, 11, 13], "from_icpsrctyi": 1, "from_statefip": 1, "fsoundex": [3, 13], "full": [3, 8, 13], "full_count_1870_1880": 11, "full_count_1900_1910": 13, "fullcount_1870_1880": 11, "function": [0, 1, 3, 7, 11], "further": [7, 13], "gamma": 9, "gbtclassifi": 9, "gen": 1, "gener": [0, 1, 5, 7, 8, 11], "generalizedlinearregress": 9, "geo": 1, "geograph": 1, "get": [0, 1, 2, 3, 11], "get_floor": 3, "get_set": 11, "get_step": 11, "get_tabl": 11, "get_task": 11, "github": 6, "give": [0, 3], "given": [0, 1, 2, 3, 4, 9, 13], "go": [3, 11], "good": 1, "gradient": [5, 9], "greater": [1, 2, 6], "greatest": 1, "group": [3, 8], "gt": 1, "h": 11, "ha": [0, 1, 2, 3, 4, 7, 11, 13], "handl": 11, "harmon": 0, "have": [0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 13], "haven": 3, "head": 0, "header": [3, 10, 12], "help": [3, 8, 9, 11], "here": [2, 3, 8, 9, 11, 13], "hh": [1, 2], "hh_blocked_match": 11, "hh_col": 3, "hh_comparison": [2, 3, 8], "hh_match": [3, 11], "hh_model_eval_repeat_fn": 11, "hh_model_eval_repeat_fp": 11, "hh_model_eval_training_data": 11, "hh_model_eval_training_featur": 11, "hh_model_eval_training_result": 11, "hh_model_eval_training_vector": 11, "hh_model_explor": 11, "hh_potential_match": [2, 11], "hh_potential_matchs_prep": 11, "hh_predicted_match": 11, "hh_repeat_fn": 13, "hh_repeat_fp": 13, "hh_scored_potential_match": 11, "hh_train": [1, 3, 8, 11, 13], "hh_training_data": 11, "hh_training_data_1900_1910": 3, "hh_training_featur": [11, 13], "hh_training_result": 13, "hidden": 11, "hierarch": [7, 11], "hierarchi": 11, "high": [9, 11], "highest": [1, 3], "highli": [7, 8], "histid": [1, 3, 13], "histid_col": 1, "hit": [3, 11, 13], "hits2": [3, 13], "hlink": [0, 1, 2, 3, 4, 6, 7, 8, 9, 13], "hold": 10, "hot": 3, "household": [0, 2, 5, 7, 9, 11, 13], "how": [1, 3, 8], "howev": [4, 7, 13], "hundr": 0, "hyper": [3, 7, 13], "hyperparamet": [9, 13], "hyphen": 0, "i": [0, 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13], "id": [1, 3], "id_column": [1, 3], "ident": 13, "identifi": [3, 7, 13], "if_valu": 0, "ignor": 7, "ii": [0, 3], "iii": [0, 3], "imm": [1, 3, 13], "imm_interacted_bplmatch": 3, "imm_interacted_immyear_caut": [3, 13], "immigr": 1, "immyear_caut": [3, 10], "immyear_diff": [1, 3, 10, 13], "implement": [9, 13], "implicitli": 2, "import": [3, 8, 11, 13], "improv": 8, "includ": [1, 3, 8, 10, 11], "incompar": 1, "increas": [3, 10], "independ": [0, 3], "independent_var": [1, 3, 13], "index": [0, 6], "indic": [1, 13], "individu": [1, 3, 8, 13], "inf": 10, "inform": [0, 1, 3, 11], "ingest": 8, "initi": [0, 1, 11], "input": [0, 1, 3, 4, 7, 8, 11, 12], "input_col": 4, "input_column": [3, 4, 10], "input_table_nam": 11, "inspect": 8, "instal": [5, 9], "instanc": [0, 9], "instead": [1, 3, 4, 6, 8], "instruct": [6, 11], "int": [0, 1, 3, 4, 9], "integ": [0, 1, 3, 10], "integr": [6, 9], "interact": [3, 5, 8, 13], "interfac": 11, "intermedi": 11, "introduct": 5, "introspect": 8, "invalid": [1, 9], "ipum": [0, 7], "ipython": 11, "isn": 13, "istemporari": 11, "item": 0, "iter": 3, "its": [0, 1, 7, 9, 11], "itself": [1, 2], "iv": 3, "jaro": [1, 10], "jaro_winkl": 3, "java": 6, "job": 11, "join": [1, 3, 12], "join_column": [3, 12], "join_valu": [3, 12], "jr": [0, 3], "json": [3, 11], "just": [1, 3, 11, 13], "jw": 1, "jw_col_templ": 1, "jw_f": [1, 3, 13], "jw_m": [3, 10, 13], "jw_max_a": 3, "jw_max_b": 3, "jw_sp": [3, 13], "jw_street": 1, "jw_threshold": 1, "kei": [1, 8, 11], "key_count": 1, "kind": 1, "know": 3, "known": 0, "label": 9, "languag": 7, "last": [0, 1, 8, 10], "latest": 6, "launch": [11, 13], "law": 0, "lead": 0, "learn": [1, 2, 3, 7, 8, 11, 13], "least": [0, 1], "leav": 0, "left": 9, "length": [1, 3, 10], "less": [1, 3], "let": 11, "letter": 0, "level": [1, 5, 11], "leverag": 3, "libomp": 9, "librari": [5, 7, 9], "like": [0, 2, 3, 8, 11], "limit": 3, "line": [7, 11], "link": [0, 1, 3, 5, 7, 9], "link_run": 11, "linkrun": [7, 11], "list": [0, 1, 3, 4, 5, 9, 11, 12], "liter": 3, "ll": 11, "load": 11, "load_conf_fil": 11, "load_config": 11, "loc_a": 1, "loc_a_0": 1, "loc_a_1": 1, "loc_b": 1, "loc_b_0": 1, "loc_b_1": 1, "locat": [1, 3, 11], "log2": 9, "logic": [0, 2, 3], "logist": [5, 9], "logistic_regress": [3, 13], "logisticregress": 9, "long": [3, 12], "longest": 8, "look": [1, 2, 11, 12], "lookup": 1, "lower": [0, 1], "lowercas": 0, "lowercase_strip": 3, "lowest": 0, "lr": 11, "lsoundex": [3, 13], "m": [0, 1], "m_caution": [1, 3, 10, 13], "m_interacted_jw_m": [3, 10, 13], "m_namefrst": 1, "m_pre": [3, 10, 13], "machin": [1, 2, 3, 7, 8, 9, 11, 13], "made": 6, "mai": [0, 2, 3, 7, 8, 9, 11], "main": 11, "mainli": 1, "major": [1, 11], "make": [0, 1, 2, 3, 6, 13], "male": [3, 12], "mani": [2, 8, 11], "manual": 13, "map": [5, 8, 10], "mardurmatch": [1, 3], "mark": 2, "marriag": 1, "match": [0, 1, 2, 5, 7, 11, 12, 13], "matches_df": 11, "matrix": 13, "max": [1, 9, 11], "max_depth": 9, "maxbin": 9, "maxdepth": [3, 9, 13], "maximum": [1, 9], "maximum_jw": 1, "mbpl": 1, "mbplmatch": [3, 13], "mean": [0, 2, 3], "meant": 3, "meet": 1, "member": [1, 8], "memori": 11, "men": 3, "messag": 11, "metadata": 8, "method": [11, 13], "mfbplmatch": 1, "mi": [3, 13], "mi0": 1, "mi1": 1, "mid_init_col": 1, "mid_init_match": 1, "middl": [0, 1], "might": 13, "minimum": [1, 9], "mininstancespernod": 9, "minu": [1, 3], "mismatch": 1, "miss": [1, 11], "ml": [3, 5, 9, 10], "mode": [5, 6, 13], "model": [5, 7, 11], "model_eval_repeat_fn": 11, "model_eval_repeat_fp": 11, "model_eval_training_data": 11, "model_eval_training_featur": 11, "model_eval_training_result": 11, "model_eval_training_vector": 11, "model_explor": [3, 11, 13], "model_paramet": [3, 8, 9, 13], "modifi": 3, "modul": 11, "moment": 8, "momloc": 1, "more": [0, 1, 2, 3, 10, 11, 13], "most": [0, 1, 8, 11], "mother": 10, "mother_birthyr": 1, "mpre": 1, "mr": 0, "much": [1, 8], "mult": 1, "multi": 1, "multipl": [0, 1, 5, 11], "multipli": 1, "must": [0, 1, 2, 3, 4, 9, 10, 12], "my": [11, 13], "my_conf": 11, "my_fil": 3, "myriad": 3, "n": [1, 9, 10], "n_training_iter": [3, 8, 13], "name": [0, 1, 3, 4, 11, 12], "name_col": 1, "name_std": [3, 12], "namefrst": [0, 1, 3], "namefrst_clean": [0, 3], "namefrst_init": 1, "namefrst_jw": [1, 2, 3, 13], "namefrst_mid_init": [0, 1], "namefrst_mid_init_2": 1, "namefrst_rel": 1, "namefrst_related_row": 1, "namefrst_split": [0, 3], "namefrst_std": [3, 12], "namefrst_std_jw": [3, 13], "namefrst_unstd": [1, 3], "namefrst_unstd_bigram": 4, "namelast": [1, 3], "namelast1": 1, "namelast2": 1, "namelast3": 1, "namelast_bigram": 3, "namelast_clean": [1, 3, 4], "namelast_clean_bigram": [3, 4], "namelast_clean_soundex": 4, "namelast_equal_as_int": 1, "namelast_frst_bigram": 4, "namelast_jw": [2, 3, 13], "namelast_jw_max": 1, "namelast_neighbor": 1, "namelast_popular": 1, "namelast_popularity_sum": 1, "nativ": 1, "nbor": [1, 3, 13], "ncount": [3, 4, 13], "ncount2": [3, 4, 13], "nearest": 0, "necessari": [4, 13], "need": [0, 1, 2, 3, 8, 9, 11, 13], "neg": [3, 5, 7], "neighbor": 1, "neighborhood": 1, "neither": 1, "nest": [2, 3, 7], "new": [0, 3, 13], "new_marr": [1, 3], "newli": 3, "niu": 1, "no_first_pad": 4, "node": 9, "non": 0, "nonzero": 1, "nor": 1, "normal": 8, "note": [2, 3], "now": [2, 11], "null": [1, 3, 4], "null_fil": 3, "num": 11, "num_col": 1, "number": [0, 1, 3, 4, 8, 9, 11], "numer": [0, 1], "numtre": [3, 9, 13], "nvl": 1, "object": [1, 3, 7, 11], "often": [2, 4, 11], "onc": [1, 11], "one": [0, 1, 3, 8], "onethird": 9, "onli": [0, 1, 2, 3, 8, 13], "ons": 5, "oper": [0, 1, 2, 3, 9], "opt": [], "option": [0, 1, 3, 4, 7, 8, 9, 11, 13], "or_group": 3, "order": [0, 3, 8], "org": 6, "organ": 11, "original_valu": 3, "oth": [3, 13], "other": [1, 3, 13], "otherwis": [0, 1, 10, 13], "our": 11, "out": [3, 4, 8, 13], "output": [0, 1, 2, 3, 4, 7, 8, 11, 13], "output_col": 4, "output_column": [3, 4, 10], "output_suspicious_td": [3, 13], "output_table_nam": 11, "output_typ": 3, "outsid": 10, "overrid": [0, 3], "override_column_a": [0, 3, 4], "override_column_b": [0, 3, 4], "override_transform": [0, 3], "overview": 5, "own": [1, 3], "p": 3, "packag": [6, 9], "page": [1, 2, 3, 11], "pair": [1, 2, 3, 13], "param": [9, 13], "param_grid": [3, 13], "paramet": [3, 7, 8, 9, 11, 13], "parent": [0, 1, 12], "parent_step_chang": 1, "parenthes": 3, "parquet": [3, 8], "part": [3, 8], "particular": [1, 2, 3], "particularli": 1, "partit": 13, "pass": [1, 8, 9], "path": [1, 3, 11, 12, 13], "pattern": 11, "peopl": [0, 1, 7, 11], "per": [1, 3, 9, 10, 11], "percent": 8, "percentag": 1, "perform": [1, 3, 7, 8, 9, 12], "persist": 11, "person": [0, 1, 7], "pip": [6, 9], "pipelin": 5, "pipeline_featur": [3, 8, 10], "piplin": 3, "placehold": 1, "pleas": [3, 11], "plu": 3, "point": [10, 13], "popul": 8, "posit": [0, 1, 3, 5, 7], "possibl": 3, "post": [3, 8], "potenti": [1, 2, 5, 8], "potential_match": [1, 2, 8, 11], "potential_matches_prep": 11, "potential_matches_univers": [3, 8], "power": 5, "predefin": 1, "predict": [3, 13], "predicted_match": 11, "prediction_col": 3, "preexist": 11, "prefer": 13, "prefix": 0, "prep": 3, "prep_step": 11, "prepar": [7, 8, 11], "prepend": 4, "prepped_df_a": 11, "prepped_df_b": 11, "preprocess": [5, 7, 11, 13], "present": [1, 3, 10], "pretti": 2, "primari": [1, 7], "primarili": [1, 8], "print": 11, "probabilist": [3, 7], "probabl": [0, 3, 9], "probit": [3, 5], "proceed": 12, "process": [2, 3, 7, 11], "produc": [0, 11], "product": [1, 7, 13], "program": [1, 3, 8, 13], "project": 6, "prompt": 11, "provid": [0, 3, 4, 7, 8, 9, 10, 11], "pull": 8, "punctuat": 0, "put": [11, 13], "py": [1, 3], "pypi": 5, "pyspark": [6, 9, 10, 11], "python": [6, 7, 9, 11], "q": [11, 13], "qmark": 0, "qualifi": 3, "queri": [1, 3], "race": [0, 1, 3, 10, 13], "race_interacted_srac": [3, 10, 13], "racematch": 3, "rais": [1, 4, 9], "random": [5, 9], "random_forest": [3, 13], "randomforestclassifi": 9, "rang": [3, 10], "rate": 1, "ratio": [3, 9], "ration": 0, "rationalize_name_word": 3, "raw": [0, 3, 8, 11], "raw_df_a": 11, "raw_df_b": 11, "read": [0, 1, 3, 8, 11], "readm": 3, "real": 7, "reason": 1, "recod": 0, "record": [0, 1, 2, 3, 7, 8], "recurs": [2, 3], "reduc": [4, 8], "refer": 3, "regex": 5, "regex_word_replac": 12, "region": [3, 13], "region_dict": 3, "regionf": 3, "regist": [8, 11], "regress": [5, 9], "regular": 13, "rel": [1, 3, 13], "relat": [0, 1, 3], "relate_a": [3, 10], "relate_col": 1, "relate_div_100": [0, 1, 3], "related_individual_max_jw": 1, "related_individual_row": 1, "relatematch": [1, 3], "relatetyp": [3, 10], "relatetype_interacted_relatematch": 3, "relationship": 7, "relev": 13, "reload": 11, "remain": 8, "remov": 0, "remove_alternate_nam": 3, "remove_qmark_hyphen": 3, "remove_suffix": 3, "repeat_fn": 13, "repeat_fp": 13, "repeatedli": 3, "replac": [0, 1, 2, 5], "replace_apostroph": 3, "replaced_birthyr": [1, 3, 4], "report": [1, 5, 7, 11], "repositori": 6, "repres": [0, 1, 3, 10, 11], "represent": [3, 8], "reproduc": 11, "request": 8, "requir": [0, 1, 2, 3, 4, 5, 8, 10, 11, 12], "research": 7, "reshap": 8, "resourc": 4, "respect": [1, 8], "restrict": 2, "result": [0, 1, 7, 10, 11, 13], "return": [1, 4, 9, 11], "reus": 5, "right": [9, 11], "risk": 1, "road": 0, "robust": 3, "root": 6, "round": [0, 3], "row": [0, 3, 4], "rule": [2, 7, 8], "run": [5, 6, 7, 8, 13], "run_all_step": [3, 11, 13], "run_step": 11, "runtim": 8, "sai": 11, "same": [0, 1, 2, 3, 4, 7, 8, 11], "sampl": 1, "satisfi": [2, 3, 8], "save": [3, 8, 13], "scala": 1, "scale": 3, "scale_data": [3, 13], "scenario": 13, "schema": 3, "score": [1, 3, 8, 10], "score_with_model": [3, 13], "scored_potential_match": 11, "scratch": 3, "script": [3, 7, 11], "search": 1, "second": [0, 1, 3, 12], "secondari": 1, "secondary_distance_col": 1, "secondary_distances_fil": 1, "secondary_key_count": 1, "secondary_loc_a": 1, "secondary_loc_b": 1, "secondary_source_column": 1, "secondary_table_nam": 1, "section": [0, 1, 2, 3, 13], "see": [1, 3, 6, 11, 13], "seen": 1, "select": [0, 1, 5, 7, 11, 13], "separ": 8, "sequenc": 7, "seri": 11, "serialp": 3, "serv": 1, "set": [0, 1, 2, 3, 4, 7, 8, 11, 13], "set_executor_memori": 11, "set_link_task": 11, "set_loc": 11, "set_num_cor": 11, "set_preexisting_t": 11, "set_print_sql": 11, "set_value_column_a": [3, 4], "set_value_column_b": [3, 4], "sever": [1, 7], "sex": [1, 3, 12], "sex_equ": 3, "sex_region_interact": 3, "sex_regionf_interact": 3, "sexmatch": 3, "sgen": [1, 3, 13], "share": 7, "short": 3, "should": [1, 3, 4, 9, 10, 11], "show": 11, "showf": 11, "shut": 13, "sibl": 0, "sign": 1, "signific": 0, "similar": [1, 3], "simpli": [0, 3], "simplifi": 1, "sinc": [0, 3, 9], "singl": [0, 4, 11, 13], "size": 1, "skip": [3, 4, 8], "sm_bpl": 1, "sm_namefrst": 1, "sm_sex": 1, "small": 3, "smaller": 7, "smallest": 3, "sn_bpl": 1, "sn_namefrst": 1, "sn_sex": 1, "so": [0, 1, 2, 3, 6, 13], "some": [0, 1, 2, 3, 4, 7, 8, 11], "someth": 11, "sometim": 3, "somewhat": 2, "soundex": 5, "sourc": [1, 5, 8, 11, 13], "source_column_a": 1, "source_column_b": 1, "sp": 1, "sp_caution": [1, 3, 13], "sp_interacted_jw_sp": [3, 13], "sp_pre": [3, 13], "space": [0, 3, 4, 12], "span": 13, "spark": [1, 3, 4, 6, 9, 10, 11, 13], "sparkfactori": 11, "sparksess": 11, "sparkxgbclassifi": 9, "special": 1, "specif": [1, 3, 11], "specifi": [0, 1, 3, 7, 8, 10, 11, 12], "split": [3, 4, 8, 9, 10, 13], "split_by_id_a": [3, 13], "sploc": 1, "spous": 0, "spouse_birthyr": 1, "spouse_bpl": 1, "sql": [0, 1, 2, 3, 4, 5, 8, 11], "sql_condit": 3, "sqrt": 9, "squar": 1, "sr": [0, 3], "srace": [3, 10, 13], "stage": 3, "standard": [0, 1, 12], "start": [0, 12], "state": [1, 7], "state_1900_1910_distances_km": 1, "state_dist": 1, "state_distance_lookup": 1, "statecode1": 1, "statecode2": 1, "statefip": [0, 1, 3], "statefip_h": 3, "step": [0, 1, 3, 7], "stepmom": 1, "still": 12, "stop": [0, 9], "street": [0, 1], "street_jw": [1, 3, 13], "street_unstd": 12, "strictli": 10, "string": [0, 1, 3, 4, 8, 9, 11, 12], "strip": [0, 8], "structur": [2, 3, 7], "sub": [1, 2, 3], "subhead": 12, "subset": [3, 12], "substitut": [5, 8], "substitution_column": [3, 8, 12], "substitution_fil": [3, 12], "substitutions_street_abbrev": 12, "subtract": 1, "suffix": 0, "suppli": 12, "support": [0, 2, 3, 8, 9], "suppos": [0, 2, 3], "sure": [2, 3, 6, 11], "surnam": 1, "surround": 0, "suspect": [1, 7], "swap": 12, "syntax": 2, "system": [6, 9], "t": [0, 1, 3, 4, 13], "tabl": [1, 2, 3, 5, 8, 11, 13], "table_nam": 1, "tablenam": 11, "tailor": 7, "take": [0, 1, 2, 3, 4, 8, 11], "taken": [0, 1], "task": [2, 3, 5, 7, 9, 13], "task_nam": 11, "tell": [1, 3, 4], "templat": 1, "ten": 1, "term": 3, "test": [3, 8, 13], "text": 11, "than": [0, 1, 2, 3, 9], "thei": [0, 1, 2, 3, 8, 11], "them": [0, 1, 3, 8], "thi": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 13], "thing": 3, "those": [1, 3], "though": 1, "thought": 8, "three": [2, 3, 8], "threshold": [2, 3, 9, 13], "threshold_expr": [2, 3], "threshold_ratio": [3, 9, 13], "through": [6, 8, 10, 11], "thu": 1, "time": [0, 3, 8, 11], "to_icpsrctyi": 1, "to_statefip": 1, "togeth": [0, 1, 3], "toml": [3, 7, 11], "tool": [6, 7], "top": [1, 5], "topic": 11, "total": 8, "train": [1, 5, 7, 9, 11], "training_data": [3, 11], "training_data_1900_1910": 13, "training_data_1900_1910_hlink_featur": 13, "training_data_subset": 3, "training_featur": [11, 13], "training_result": 13, "transform": [1, 3, 5, 7, 8], "transformer_typ": [3, 10], "treat": [3, 10], "tree": [5, 9], "true": [1, 3, 4, 8, 10, 12, 13], "try": [3, 9], "tune": [7, 13], "tutori": [3, 11], "two": [0, 1, 2, 3, 4, 7, 8, 10, 11, 13], "type": [0, 3, 4, 5, 8, 9, 11, 12, 13], "typic": [3, 8], "u": 0, "ugli": 2, "under": [1, 3], "understand": 7, "union": 5, "uniqu": 3, "unit": 7, "univers": [1, 5, 8], "unknown": 1, "unlik": 2, "unrel": 1, "unstabl": [9, 11], "up": [1, 3, 11, 12], "updat": 13, "upper": 1, "upper_threshold": 3, "uppercas": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13], "us1900": 3, "us1900m_usa": 3, "us1910": 3, "us1910m_usa": 3, "usag": [3, 4, 5, 11], "use_potential_matches_featur": 13, "use_training_data_featur": [3, 8, 13], "user": [1, 11], "usual": [0, 8, 13], "util": 4, "v": 3, "valu": [0, 1, 2, 3, 4, 9, 10, 11, 12], "var": [1, 3], "vari": [0, 3, 4], "variabl": [0, 1, 3, 13], "variant": 12, "varieti": 8, "ve": 13, "vector": [3, 10], "verbos": 2, "veri": [4, 8, 13], "version": [0, 6, 9, 13], "vi": 3, "via": [6, 7], "view": 9, "vii": 3, "viii": 3, "volumn": 1, "wa": [1, 13], "wai": [1, 2, 3, 6, 11], "want": [0, 1, 3, 11, 13], "washington": 3, "we": [0, 1, 11, 13], "well": 3, "were": 3, "weren": 13, "what": [1, 3, 11, 13], "when": [0, 1, 2, 3, 4, 8, 13], "where": [1, 8, 11, 13], "whether": [1, 3, 12], "which": [0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 13], "white": 0, "whitespac": [0, 8], "who": 1, "whole": [0, 7], "whose": 1, "width": 8, "winkler": [1, 10], "within": [1, 3, 7, 11, 12], "without": 9, "word": [0, 5], "work": [0, 1, 3, 6, 8, 11, 13], "workflow": [4, 5], "world": 7, "would": [1, 2, 3, 13], "write": [1, 11, 13], "written": [3, 7], "x": [3, 10, 11], "x_crosswalk": 11, "x_hh_tfam": 11, "x_hh_tfam_2a": 11, "x_hh_tfam_2b": 11, "x_load": 11, "x_parquet_from_csv": 11, "x_persist": 11, "x_sql": 11, "x_sqlf": 11, "x_summari": 11, "x_tab": 11, "x_tfam": 11, "x_tfam_raw": 11, "x_union": 11, "xgboost": 5, "y": 10, "year": [0, 1, 3, 4, 5], "year_b": 1, "yet": 11, "you": [0, 1, 2, 3, 6, 9, 11, 12, 13], "your": [2, 3, 4, 6, 8, 9, 11, 13], "yrimmig": 1, "zero": 1}, "titles": ["Column Mappings", "Comparison Features", "Comparisons", "Configuration", "Feature Selection Transforms", "Welcome to hlink\u2019s documentation!", "Installation", "Introduction", "Link Tasks", "Models", "Pipeline generated features", "Running hlink", "Substitutions", "Advanced Workflow Examples"], "titleterms": {"": 5, "1": 12, "abs_diff": 1, "add": 1, "add_to_a": 0, "advanc": [0, 3, 13], "after": 13, "aggreg": 1, "alia": 1, "all_equ": 1, "any_equ": 1, "api": 5, "arrai": 4, "array_index": 0, "b_minus_a": 1, "basic": [0, 3], "bigram": 4, "block": 3, "btwn_threshold": 1, "bucket": 10, "caution_comp_3": 1, "caution_comp_3_012": 1, "caution_comp_4": 1, "caution_comp_4_012": 1, "column": [0, 3], "comparison": [1, 2, 3], "concat_to_a": 0, "concat_to_b": 0, "concat_two_col": 0, "condense_strip_whitespac": 0, "config": 3, "configur": [3, 5, 8], "data": [3, 12, 13], "decision_tre": 9, "defin": 2, "differ": 13, "divide_by_int": 0, "document": 5, "either_are_0": 1, "either_are_1": 1, "equal": 1, "equals_as_int": 1, "exact_mult": 1, "exampl": [11, 13], "explor": [8, 13], "export": 13, "extra_children": 1, "f1_match": 1, "f2_match": 1, "fals": 13, "featur": [1, 3, 4, 10, 13], "fetch_a": 1, "fetch_b": 1, "file": 3, "filter": 3, "fn": 13, "fp": 13, "from": 6, "gener": [3, 10, 13], "geo_dist": 1, "get_floor": 0, "gradient_boosted_tre": 9, "gt_threshold": 1, "hit": 1, "hits2": 1, "hlink": [5, 11], "household": [1, 3, 8], "instal": 6, "interact": [10, 11], "introduct": 7, "jaro_winkl": 1, "jaro_winkler_r": 1, "jaro_winkler_street": 1, "jw_max_a": 1, "jw_max_b": 1, "length_b": 1, "level": 3, "librari": 11, "link": [8, 11, 13], "list": 13, "logistic_regress": 9, "look_at_addl_var": 1, "lower_threshold": 1, "lowercase_strip": 0, "map": [0, 3], "match": [3, 8], "max_jaro_winkl": 1, "maximum_jaro_winkl": 1, "ml": 13, "mode": 11, "model": [3, 8, 9, 13], "multi_jaro_winkler_search": 1, "multipl": [2, 3], "neg": 13, "neither_are_nul": 1, "not_equ": 1, "not_zero_and_not_equ": 1, "ons": 1, "overview": [2, 7, 8], "pipelin": [3, 10], "posit": 13, "potenti": [3, 13], "power": [1, 4], "preprocess": 8, "present_and_equal_categorical_in_univers": 1, "present_and_matching_categor": 1, "present_and_not_equ": 1, "present_both_year": 1, "probit": 9, "program": 11, "pypi": 6, "random_forest": 9, "rationalize_name_word": 0, "regex": 12, "rel_jaro_winkl": 1, "relat": 8, "remove_alternate_nam": 0, "remove_one_letter_nam": 0, "remove_prefix": 0, "remove_punctu": 0, "remove_qmark_hyphen": 0, "remove_stop_word": 0, "remove_suffix": 0, "replac": 12, "replace_apostroph": 0, "report": 8, "requir": 6, "reus": 13, "run": 11, "second_gen_imm": 1, "section": 8, "select": [3, 4], "singl": 3, "soundex": 4, "sourc": [3, 6], "split": 0, "sql_condit": [1, 4], "start": 11, "step": [8, 11], "substitut": [3, 12], "substr": 0, "sum": 1, "tabl": 12, "task": [8, 11], "threshold": 1, "time": 1, "top": 3, "train": [3, 8, 13], "transform": [0, 4, 10], "type": [1, 2, 10], "union": 4, "univers": 3, "upper_threshold": 1, "us": 11, "usag": 0, "welcom": 5, "when_valu": 0, "word": 12, "workflow": [11, 13], "xgboost": 9, "year": 13}})
\ No newline at end of file
diff --git a/docs/substitutions.html b/docs/substitutions.html
index 2d12001..b3739cc 100644
--- a/docs/substitutions.html
+++ b/docs/substitutions.html
@@ -7,7 +7,8 @@
 
     <title>Substitutions &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -113,7 +114,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -146,16 +156,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -173,7 +173,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/substitutions.md.txt"
diff --git a/docs/use_examples.html b/docs/use_examples.html
index 84fbe95..94e3c6a 100644
--- a/docs/use_examples.html
+++ b/docs/use_examples.html
@@ -7,7 +7,8 @@
 
     <title>Advanced Workflow Examples &#8212; hlink 3.7.0 documentation</title>
     <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=d1102ebc" />
-    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=12dfc556" />
+    <link rel="stylesheet" type="text/css" href="_static/basic.css?v=686e5160" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=27fed22d" />
     <script src="_static/documentation_options.js?v=229cbe3b"></script>
     <script src="_static/doctools.js?v=9bcbadda"></script>
     <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
@@ -177,7 +178,16 @@ <h1 class="logo"><a href="index.html">hlink</a></h1>
 
 
 
-<h3>Navigation</h3>
+
+<search id="searchbox" style="display: none" role="search">
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" placeholder="Search"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</search>
+<script>document.getElementById('searchbox').style.display = "block"</script><h3>Navigation</h3>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="introduction.html">Introduction</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
@@ -210,16 +220,6 @@ <h3>Related Topics</h3>
   </ul></li>
 </ul>
 </div>
-<search id="searchbox" style="display: none" role="search">
-  <h3 id="searchlabel">Quick search</h3>
-    <div class="searchformwrapper">
-    <form class="search" action="search.html" method="get">
-      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
-      <input type="submit" value="Go" />
-    </form>
-    </div>
-</search>
-<script>document.getElementById('searchbox').style.display = "block"</script>
 
 
 
@@ -237,7 +237,7 @@ <h3 id="searchlabel">Quick search</h3>
       
       |
       Powered by <a href="https://www.sphinx-doc.org/">Sphinx 8.1.3</a>
-      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 0.7.16</a>
+      &amp; <a href="https://alabaster.readthedocs.io">Alabaster 1.0.0</a>
       
       |
       <a href="_sources/use_examples.md.txt"