From 2e9674da18a99133214eaede61d7b2f80f28b8ca Mon Sep 17 00:00:00 2001
From: Zayd Simjee <zaydsimjee@gmail.com>
Date: Mon, 9 Dec 2024 15:33:16 -0800
Subject: [PATCH] import validator

---
 .env                               |    0
 .github/workflows/pr_qc.yml        |   19 +
 .github/workflows/publish_pypi.yml |   17 +
 .gitignore                         |    9 +
 CONTRIBUTING.md                    |   38 +
 LICENSE                            |  201 +++++
 Makefile                           |   16 +
 README.md                          |  100 +++
 app_inference_spec.py              |   69 ++
 benchmarks.json                    | 1238 ++++++++++++++++++++++++++++
 pyproject.toml                     |   25 +
 pyrightconfig.json                 |    3 +
 tests/__init__.py                  |    0
 tests/test_validator.py            |   23 +
 validator/__init__.py              |    3 +
 validator/analyzer_engine.py       |   95 +++
 validator/constants.py             |   21 +
 validator/gliner_recognizer.py     |   39 +
 validator/main.py                  |  190 +++++
 validator/post-install.py          |    5 +
 20 files changed, 2111 insertions(+)
 create mode 100644 .env
 create mode 100644 .github/workflows/pr_qc.yml
 create mode 100644 .github/workflows/publish_pypi.yml
 create mode 100644 .gitignore
 create mode 100644 CONTRIBUTING.md
 create mode 100644 LICENSE
 create mode 100644 Makefile
 create mode 100644 README.md
 create mode 100644 app_inference_spec.py
 create mode 100644 benchmarks.json
 create mode 100644 pyproject.toml
 create mode 100644 pyrightconfig.json
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_validator.py
 create mode 100644 validator/__init__.py
 create mode 100644 validator/analyzer_engine.py
 create mode 100644 validator/constants.py
 create mode 100644 validator/gliner_recognizer.py
 create mode 100644 validator/main.py
 create mode 100644 validator/post-install.py

diff --git a/.env b/.env
new file mode 100644
index 0000000..e69de29
diff --git a/.github/workflows/pr_qc.yml b/.github/workflows/pr_qc.yml
new file mode 100644
index 0000000..7a46efe
--- /dev/null
+++ b/.github/workflows/pr_qc.yml
@@ -0,0 +1,19 @@
+name: Pull Request Quality Checks
+on:
+  pull_request:
+    types: [ opened, synchronize ]
+    branches: [ main ]
+jobs:
+  run-qa:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
+      - name: Run qa
+        run: |
+          pip install ".[dev]"
+          make qa
diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml
new file mode 100644
index 0000000..628a135
--- /dev/null
+++ b/.github/workflows/publish_pypi.yml
@@ -0,0 +1,17 @@
+name: Publish to Guardrails Hub
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+jobs:
+  setup:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Build & Deploy
+        uses: guardrails-ai/guardrails/.github/actions/validator_pypi_publish@main
+        with:
+          guardrails_token: ${{ secrets.GR_GUARDRAILS_TOKEN }}
+          validator_id: guardrails/guardrails_pii
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c8398b0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+.python-version
+__pycache__/
+build
+*.egg-info
+.venv
+.pytest_cache
+.ruff_cache
+.vscode
+.idea
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..1d1dd29
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,38 @@
+# Guardrails Validator Template
+
+## How to create a Guardrails Validator
+- On the top right of the page, click "Use this template", select "create a new repository"  and set a name for the package.  See [Naming Conventions](#naming-conventions) below.
+- Clone down the new repository.
+- Modify the class in [validator/main.py](validator/main.py) with source code for the new validator
+    - Make sure that the class still inherits from `Validator` and has the `register_validator` annotation.
+    - Set the `name` in the `register_validator` to the name of the repo prefixed with your org as a namespace and set the appropriate data type.
+- Change [validator/__init__.py](validator/__init__.py) to your new Validator classname instead of ValidatorTemplate
+- Perform a self install with `make dev` or `pip install -e ".[dev]"`
+- Locally test the validator with the [test instructions below](#testing-and-using-your-validator)
+- Modify the README and follow the Validator Card format; you can find an example [here](https://github.com/guardrails-ai/lowercase/blob/main/README.md)
+
+* Note: This package uses a pyproject.toml file, on first run, run `make dev` to pull down and install all dependencies
+
+### Naming Conventions
+1. Avoid using `is` and `bug`
+2. Use snake_case: i.e. `_` to separate words. e.g. valid_address
+3. For the description of the repo, write one sentence that says what the validator does; should be the same as the description in the pydoc string.
+4. When annotating the class use the `{namespace}/{validator_name}` pattern: e.g. `@register_validator(name=“guardrails/valid_address”)`
+
+### Testing and using your validator
+- Open [test/test-validator.py](test/test-validator.py) to test your new validator 
+- Import your new validator and modify `ValidatorTestObject` accordingly
+- Modify the TEST_OUTPUT and TEST_FAIL_OUTPUT accordingly
+- Run `python test/test-validator.py` via terminal, make sure the returned output reflects the input object 
+- Write advanced tests for failures, etc.
+
+## Upload your validator to the validator hub
+- Update the [pyproject.toml](pyproject.toml) file and make necessary changes as follows:
+    - Update the `name` field to the name of your validator
+    - Update the `description` field to a short description of your validator
+    - Update the `authors` field to your name and email
+    - Add/update the `dependencies` field to include all dependencies your validator needs.
+- If there are are any post-installation steps such as downloading tokenizers, logging into huggingface etc., update the [post-install.py](validator/post-install.py) file accordingly.
+- You can add additional files to the [validator](validator) directory, but don't rename any existing files/directories.
+    - e.g. Add any environment variables (without the values, just the keys) to the [.env](.env) file.
+- Ensure that there are no other dependencies or any additional steps required to run your validator.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..62d33ec
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,16 @@
+dev:
+	pip install -e ".[dev]"
+
+lint:
+	ruff check .
+
+test:
+	pytest ./tests
+
+type:
+	pyright validator
+
+qa:
+	make lint
+	make type
+	make test
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..de0eabc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,100 @@
+# Overview
+
+| Developed by | Guardrails AI |
+| --- | --- |
+| Date of development | September 2, 2024 |
+| Validator type | Format |
+| Blog |  |
+| License | Apache 2 |
+| Input/Output | Output |
+
+## Description
+
+### Intended Use
+This validator is designed to detect and anonymize Personally Identifiable Information (PII) in LLM-generated text using state-of-the-art methods. Currently a combination of Presidio and GLiNER yields the highest performing results. It supports various entity types and can be configured to focus on specific PII categories.
+
+Key features:
+- Detects PII using both Presidio's built-in recognizers and a GLiNER-based recognizer
+- Anonymizes detected PII to protect sensitive information
+- Customizable entity types for targeted PII detection
+- Provides detailed error spans for identified PII instances
+
+Use this validator to ensure that generated text does not inadvertently contain sensitive personal information, helping to maintain privacy and compliance with data protection regulations.
+
+### Requirements
+
+* Dependencies:
+	- guardrails-ai>=0.4.0
+	- gliner
+	- presidio-analyzer
+	- presidio-anonymizer
+
+## Installation
+
+```bash
+$ guardrails hub install hub://guardrails/guardrails_pii
+```
+
+## Usage Examples
+
+### Validating string output via Python
+
+In this example, we apply the validator to a string output generated by an LLM.
+
+```python
+# Import Guard and Validator
+from guardrails.hub import GuardrailsPII
+from guardrails import Guard
+
+# Setup Guard
+guard = Guard().use(
+    GuardrailsPII(entities=["DATE_TIME"], on_fail="fix")
+)
+
+```
+
+# API Reference
+
+**`__init__(self, entities: List[str], model_name: str = "urchade/gliner_small-v2.1", on_fail: Optional[Callable] = None)`**
+<ul>
+Initializes a new instance of the GuardrailsPII class.
+
+**Parameters**
+- **`entities`** *(List[str])*: A list of entity types to detect and anonymize.
+- **`model_name`** *(str, optional)*: The name of the GLiNER model to use. Defaults to "urchade/gliner_small-v2.1".
+- **`on_fail`** *(Optional[Callable], optional)*: A callable to execute when the validation fails. Defaults to None.
+
+This validator uses Presidio and GLiNER to detect and anonymize PII in the generated text.
+
+**Key Properties**
+
+| Property                      | Description                         |
+| ----------------------------- | ----------------------------------- |
+| Name for `format` attribute   | `guardrails/guardrails_pii`    |
+| Supported data types          | `string`                            |
+| Programmatic fix              | Anonymized text                     |
+</ul>
+<br/>
+
+**`validate(self, value: Any, metadata: Dict = {}) -> ValidationResult`**
+<ul>
+Validates the given `value` using the rules defined in this validator, relying on the `metadata` provided to customize the validation process. This method is automatically invoked by `guard.parse(...)`, ensuring the validation logic is applied to the input data.
+
+Note:
+
+1. This method should not be called directly by the user. Instead, invoke `guard.parse(...)` where this method will be called internally for each associated Validator.
+2. When invoking `guard.parse(...)`, ensure to pass the appropriate `metadata` dictionary that includes keys and values required by this validator. If `guard` is associated with multiple validators, combine all necessary metadata into a single dictionary.
+
+**Parameters**
+- **`value`** *(Any)*: The input value to validate.
+- **`metadata`** *(Dict)*: A dictionary containing metadata required for validation. Keys and values must match the expectations of this validator.
+
+    | Key | Type | Description | Default |
+    | --- | --- | --- | --- |
+    | `entities` | List[str] | List of entity types to detect and anonymize. | `self.entities` |
+
+**Returns**
+- **`ValidationResult`**: A `PassResult` if no PII is detected, or a `FailResult` with the anonymized text and error spans if PII is found.
+
+This method first retrieves the list of entities to detect from the metadata or falls back to the default entities set during initialization. It then uses the `anonymize` method to process the input text and detect PII. If no PII is found, it returns a `PassResult`. Otherwise, it returns a `FailResult` with the anonymized text and error spans.
+</ul>
diff --git a/app_inference_spec.py b/app_inference_spec.py
new file mode 100644
index 0000000..748579b
--- /dev/null
+++ b/app_inference_spec.py
@@ -0,0 +1,69 @@
+from fastapi import HTTPException
+from pydantic import BaseModel
+from typing import List, Optional, cast
+
+from validator.main import GuardrailsPII, InferenceInput as InputRequest, InferenceOutput as OutputResponse
+from models_host.base_inference_spec import BaseInferenceSpec
+
+
+class InferenceSpec(BaseInferenceSpec):
+    model: Optional[GuardrailsPII] = None
+    model_name = "guardrails_pii"
+
+    def load(self):
+        self.model = GuardrailsPII(use_local=True, entities=[
+            "CREDIT_CARD",
+            "CRYPTO",
+            "DATE_TIME",
+            "EMAIL_ADDRESS",
+            "IBAN_CODE",
+            "IP_ADDRESS",
+            "NRP",
+            "LOCATION",
+            "PERSON",
+            "PHONE_NUMBER",
+            "MEDICAL_LICENSE",
+            "URL",
+            "US_BANK_NUMBER",
+            "US_DRIVER_LICENSE",
+            "US_ITIN",
+            "US_PASSPORT",
+            "US_SSN",
+            "UK_NHS",
+            "ES_NIF",
+            "ES_NIE",
+            "IT_FISCAL_CODE",
+            "IT_DRIVER_LICENSE",
+            "IT_VAT_CODE",
+            "IT_PASSPORT",
+            "IT_IDENTITY_CARD",
+            "PL_PESEL",
+            "SG_NRIC_FIN",
+            "SG_UEN",
+            "AU_ABN",
+            "AU_ACN",
+            "AU_TFN",
+            "AU_MEDICARE",
+            "IN_PAN",
+            "IN_AADHAAR",
+            "IN_VEHICLE_REGISTRATION",
+            "IN_VOTER",
+            "IN_PASSPORT",
+            "FI_PERSONAL_IDENTITY_CODE"
+        ])
+
+    def process_request(self, input_request: InputRequest):
+        text = input_request.text
+        entities = input_request.entities  
+        if not text or not entities:
+            raise HTTPException(status_code=400, detail="Invalid input")
+        args = (text, entities)
+        kwargs = {}  
+        return args, kwargs
+
+    def infer(self, text: str, entities: List[str]) -> OutputResponse:
+        # should be loaded before this method is called
+        model = cast(GuardrailsPII, self.model)
+        return model._inference_local(InputRequest(text=text, entities=entities))
+
+            
\ No newline at end of file
diff --git a/benchmarks.json b/benchmarks.json
new file mode 100644
index 0000000..a328275
--- /dev/null
+++ b/benchmarks.json
@@ -0,0 +1,1238 @@
+[
+  {
+    "title": "Any",
+    "latency_cpu_sec": 0.5001564576890734,
+    "latency_cpu_stddev": 0.0791025575617722,
+    "latency_gpu_sec": 0.06782163196139865,
+    "latency_gpu_stddev": 0.011952468133168508,
+    "tpr_at_default": 0.7480387545262547,
+    "fpr_at_default": null,
+    "f1_at_default": 0.6519744879428565,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 236951,
+        "fp": 173158,
+        "tn": null,
+        "fn": 79812
+      },
+      {
+        "threshold": 0.1,
+        "tp": 216769,
+        "fp": 164169,
+        "tn": null,
+        "fn": 83164
+      },
+      {
+        "threshold": 0.2,
+        "tp": 214515,
+        "fp": 163164,
+        "tn": null,
+        "fn": 83164
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 209469,
+        "fp": 134487,
+        "tn": null,
+        "fn": 79270
+      },
+      {
+        "threshold": 0.4,
+        "tp": 209469,
+        "fp": 134487,
+        "tn": null,
+        "fn": 79270
+      },
+      {
+        "threshold": 0.5,
+        "tp": 205309,
+        "fp": 132792,
+        "tn": null,
+        "fn": 82021
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 185934,
+        "fp": 106385,
+        "tn": null,
+        "fn": 96165
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 164892,
+        "fp": 78720,
+        "tn": null,
+        "fn": 113952
+      },
+      {
+        "threshold": 0.8,
+        "tp": 138328,
+        "fp": 52562,
+        "tn": null,
+        "fn": 138390
+      },
+      {
+        "threshold": 0.9,
+        "tp": 99884,
+        "fp": 26493,
+        "tn": null,
+        "fn": 174665
+      },
+      {
+        "threshold": 1.0,
+        "tp": 15553,
+        "fp": 936,
+        "tn": null,
+        "fn": 256729
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06662560205389984,
+        "latency_seconds_stddev": 0.019666412039881533,
+        "latency_seconds_min": 0.05627250671386719,
+        "latency_seconds_max": 0.2916872501373291
+      },
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.06876428430730647,
+        "latency_seconds_stddev": 0.005672477485983573,
+        "latency_seconds_min": 0.05778670310974121,
+        "latency_seconds_max": 0.0882265567779541
+      },
+      {
+        "input_length_chars": 200,
+        "latency_seconds": 0.06019690036773682,
+        "latency_seconds_stddev": 0.002385315166132153,
+        "latency_seconds_min": 0.05768465995788574,
+        "latency_seconds_max": 0.06442856788635254
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.06782427200904259,
+        "latency_seconds_stddev": 0.006735543066725786,
+        "latency_seconds_min": 0.05729365348815918,
+        "latency_seconds_max": 0.07943344116210938
+      },
+      {
+        "input_length_chars": 100,
+        "latency_seconds": 0.05822432041168213,
+        "latency_seconds_stddev": 0.0021723508834838867,
+        "latency_seconds_min": 0.05605196952819824,
+        "latency_seconds_max": 0.060396671295166016
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "US_DRIVER_LICENSE",
+    "latency_cpu_sec": 0.0027650482289497203,
+    "latency_cpu_stddev": 0.07927303849248475,
+    "latency_gpu_sec": 0.0003645946994020242,
+    "latency_gpu_stddev": 0.00482177644304074,
+    "tpr_at_default": 0.7544902093180283,
+    "fpr_at_default": null,
+    "f1_at_default": 0.4855938464212768,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 16761,
+        "fp": 30057,
+        "tn": null,
+        "fn": 5454
+      },
+      {
+        "threshold": 0.1,
+        "tp": 13243,
+        "fp": 24476,
+        "tn": null,
+        "fn": 7186
+      },
+      {
+        "threshold": 0.2,
+        "tp": 10993,
+        "fp": 24476,
+        "tn": null,
+        "fn": 7186
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 9767,
+        "fp": 3292,
+        "tn": null,
+        "fn": 6497
+      },
+      {
+        "threshold": 0.4,
+        "tp": 9767,
+        "fp": 3292,
+        "tn": null,
+        "fn": 6497
+      },
+      {
+        "threshold": 0.5,
+        "tp": 8433,
+        "fp": 2229,
+        "tn": null,
+        "fn": 7531
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 6929,
+        "fp": 2102,
+        "tn": null,
+        "fn": 8848
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 3701,
+        "fp": 829,
+        "tn": null,
+        "fn": 11506
+      },
+      {
+        "threshold": 0.8,
+        "tp": 2142,
+        "fp": 612,
+        "tn": null,
+        "fn": 13065
+      },
+      {
+        "threshold": 0.9,
+        "tp": 731,
+        "fp": 377,
+        "tn": null,
+        "fn": 14476
+      },
+      {
+        "threshold": 1.0,
+        "tp": 0,
+        "fp": 4,
+        "tn": null,
+        "fn": 15207
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06350061098734537,
+        "latency_seconds_stddev": 0.0028365657735759868,
+        "latency_seconds_min": 0.05732417106628418,
+        "latency_seconds_max": 0.06853342056274414
+      },
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.06732935063979205,
+        "latency_seconds_stddev": 0.004817657110267345,
+        "latency_seconds_min": 0.06043529510498047,
+        "latency_seconds_max": 0.07927799224853516
+      },
+      {
+        "input_length_chars": 100,
+        "latency_seconds": 0.05605196952819824,
+        "latency_seconds_stddev": 0.0,
+        "latency_seconds_min": 0.05605196952819824,
+        "latency_seconds_max": 0.05605196952819824
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "US_SSN",
+    "latency_cpu_sec": 0.0026060162854894243,
+    "latency_cpu_stddev": 0.08295561378664407,
+    "latency_gpu_sec": 0.00036480367900903956,
+    "latency_gpu_stddev": 0.005411268668113324,
+    "tpr_at_default": 0.8288305237808549,
+    "fpr_at_default": null,
+    "f1_at_default": 0.8414478082322604,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 22027,
+        "fp": 3752,
+        "tn": null,
+        "fn": 4549
+      },
+      {
+        "threshold": 0.1,
+        "tp": 11874,
+        "fp": 2385,
+        "tn": null,
+        "fn": 6403
+      },
+      {
+        "threshold": 0.2,
+        "tp": 11874,
+        "fp": 2385,
+        "tn": null,
+        "fn": 6403
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 11874,
+        "fp": 589,
+        "tn": null,
+        "fn": 5318
+      },
+      {
+        "threshold": 0.4,
+        "tp": 11874,
+        "fp": 589,
+        "tn": null,
+        "fn": 5318
+      },
+      {
+        "threshold": 0.5,
+        "tp": 10935,
+        "fp": 557,
+        "tn": null,
+        "fn": 5694
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 8966,
+        "fp": 364,
+        "tn": null,
+        "fn": 7096
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 7401,
+        "fp": 244,
+        "tn": null,
+        "fn": 8612
+      },
+      {
+        "threshold": 0.8,
+        "tp": 5562,
+        "fp": 164,
+        "tn": null,
+        "fn": 10423
+      },
+      {
+        "threshold": 0.9,
+        "tp": 2524,
+        "fp": 79,
+        "tn": null,
+        "fn": 13267
+      },
+      {
+        "threshold": 1.0,
+        "tp": 0,
+        "fp": 3,
+        "tn": null,
+        "fn": 15791
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.07059233419356807,
+        "latency_seconds_stddev": 0.005871802048002383,
+        "latency_seconds_min": 0.060651540756225586,
+        "latency_seconds_max": 0.0821542739868164
+      },
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06668961842854818,
+        "latency_seconds_stddev": 0.003372576439908064,
+        "latency_seconds_min": 0.060778141021728516,
+        "latency_seconds_max": 0.07352352142333984
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.0668608546257019,
+        "latency_seconds_stddev": 0.0034799743984401913,
+        "latency_seconds_min": 0.06315016746520996,
+        "latency_seconds_max": 0.07257485389709473
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "DATE_TIME",
+    "latency_cpu_sec": 0.0010070136105612946,
+    "latency_cpu_stddev": 0.07450837961447598,
+    "latency_gpu_sec": 0.00014612826511595846,
+    "latency_gpu_stddev": 0.03183003269274835,
+    "tpr_at_default": 0.8010428713453852,
+    "fpr_at_default": null,
+    "f1_at_default": 0.7786411596489499,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 39488,
+        "fp": 12646,
+        "tn": null,
+        "fn": 9806
+      },
+      {
+        "threshold": 0.1,
+        "tp": 39481,
+        "fp": 12646,
+        "tn": null,
+        "fn": 9806
+      },
+      {
+        "threshold": 0.2,
+        "tp": 39481,
+        "fp": 12646,
+        "tn": null,
+        "fn": 9806
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 39481,
+        "fp": 12642,
+        "tn": null,
+        "fn": 9806
+      },
+      {
+        "threshold": 0.4,
+        "tp": 39481,
+        "fp": 12642,
+        "tn": null,
+        "fn": 9806
+      },
+      {
+        "threshold": 0.5,
+        "tp": 39481,
+        "fp": 12642,
+        "tn": null,
+        "fn": 9806
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 36871,
+        "fp": 10255,
+        "tn": null,
+        "fn": 12416
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 33280,
+        "fp": 7955,
+        "tn": null,
+        "fn": 16007
+      },
+      {
+        "threshold": 0.8,
+        "tp": 27824,
+        "fp": 5522,
+        "tn": null,
+        "fn": 21463
+      },
+      {
+        "threshold": 0.9,
+        "tp": 17700,
+        "fp": 2740,
+        "tn": null,
+        "fn": 31587
+      },
+      {
+        "threshold": 1.0,
+        "tp": 0,
+        "fp": 38,
+        "tn": null,
+        "fn": 49287
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.07724040432980186,
+        "latency_seconds_stddev": 0.05075409971249863,
+        "latency_seconds_min": 0.05984759330749512,
+        "latency_seconds_max": 0.2916872501373291
+      },
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.06876775196620397,
+        "latency_seconds_stddev": 0.0053065053301728105,
+        "latency_seconds_min": 0.06025385856628418,
+        "latency_seconds_max": 0.07941031455993652
+      },
+      {
+        "input_length_chars": 200,
+        "latency_seconds": 0.0630791187286377,
+        "latency_seconds_stddev": 0.0,
+        "latency_seconds_min": 0.0630791187286377,
+        "latency_seconds_max": 0.0630791187286377
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.06901347637176514,
+        "latency_seconds_stddev": 0.002688765525817871,
+        "latency_seconds_min": 0.06632471084594727,
+        "latency_seconds_max": 0.07170224189758301
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "IP_ADDRESS",
+    "latency_cpu_sec": 0.002971611313650671,
+    "latency_cpu_stddev": 0.08356817204047809,
+    "latency_gpu_sec": 0.00039036310293980356,
+    "latency_gpu_stddev": 0.006398118352734842,
+    "tpr_at_default": 0.9719929095973664,
+    "fpr_at_default": null,
+    "f1_at_default": 0.9650282840980515,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 19192,
+        "fp": 4148,
+        "tn": null,
+        "fn": 3830
+      },
+      {
+        "threshold": 0.1,
+        "tp": 19192,
+        "fp": 4148,
+        "tn": null,
+        "fn": 3830
+      },
+      {
+        "threshold": 0.2,
+        "tp": 19192,
+        "fp": 4148,
+        "tn": null,
+        "fn": 3830
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 19192,
+        "fp": 838,
+        "tn": null,
+        "fn": 553
+      },
+      {
+        "threshold": 0.4,
+        "tp": 19192,
+        "fp": 838,
+        "tn": null,
+        "fn": 553
+      },
+      {
+        "threshold": 0.5,
+        "tp": 19192,
+        "fp": 838,
+        "tn": null,
+        "fn": 553
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 14253,
+        "fp": 538,
+        "tn": null,
+        "fn": 3566
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 13681,
+        "fp": 345,
+        "tn": null,
+        "fn": 3678
+      },
+      {
+        "threshold": 0.8,
+        "tp": 12902,
+        "fp": 246,
+        "tn": null,
+        "fn": 3901
+      },
+      {
+        "threshold": 0.9,
+        "tp": 11593,
+        "fp": 202,
+        "tn": null,
+        "fn": 4219
+      },
+      {
+        "threshold": 1.0,
+        "tp": 0,
+        "fp": 8,
+        "tn": null,
+        "fn": 14111
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.06989211194655474,
+        "latency_seconds_stddev": 0.00624749421212025,
+        "latency_seconds_min": 0.060031890869140625,
+        "latency_seconds_max": 0.08392930030822754
+      },
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06568134914744984,
+        "latency_seconds_stddev": 0.0036130642628351894,
+        "latency_seconds_min": 0.059876441955566406,
+        "latency_seconds_max": 0.07319164276123047
+      },
+      {
+        "input_length_chars": 200,
+        "latency_seconds": 0.058855414390563965,
+        "latency_seconds_stddev": 0.0007027387619018555,
+        "latency_seconds_min": 0.05815267562866211,
+        "latency_seconds_max": 0.05955815315246582
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.07756912708282471,
+        "latency_seconds_stddev": 0.001826167106628418,
+        "latency_seconds_min": 0.07574295997619629,
+        "latency_seconds_max": 0.07939529418945312
+      },
+      {
+        "input_length_chars": 100,
+        "latency_seconds": 0.060396671295166016,
+        "latency_seconds_stddev": 0.0,
+        "latency_seconds_min": 0.060396671295166016,
+        "latency_seconds_max": 0.060396671295166016
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "PHONE_NUMBER",
+    "latency_cpu_sec": 0.0030517373741155516,
+    "latency_cpu_stddev": 0.06791256200432275,
+    "latency_gpu_sec": 0.0004287002891894208,
+    "latency_gpu_stddev": 0.005807757584475467,
+    "tpr_at_default": 0.922625188477105,
+    "fpr_at_default": null,
+    "f1_at_default": 0.7691188145011908,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 11642,
+        "fp": 6941,
+        "tn": null,
+        "fn": 1767
+      },
+      {
+        "threshold": 0.1,
+        "tp": 11626,
+        "fp": 6088,
+        "tn": null,
+        "fn": 1027
+      },
+      {
+        "threshold": 0.2,
+        "tp": 11626,
+        "fp": 6088,
+        "tn": null,
+        "fn": 1027
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 11626,
+        "fp": 6062,
+        "tn": null,
+        "fn": 1027
+      },
+      {
+        "threshold": 0.4,
+        "tp": 11626,
+        "fp": 6062,
+        "tn": null,
+        "fn": 1027
+      },
+      {
+        "threshold": 0.5,
+        "tp": 11626,
+        "fp": 6005,
+        "tn": null,
+        "fn": 975
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 11478,
+        "fp": 4871,
+        "tn": null,
+        "fn": 1123
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 11210,
+        "fp": 3851,
+        "tn": null,
+        "fn": 1391
+      },
+      {
+        "threshold": 0.8,
+        "tp": 10772,
+        "fp": 2816,
+        "tn": null,
+        "fn": 1829
+      },
+      {
+        "threshold": 0.9,
+        "tp": 9397,
+        "fp": 1546,
+        "tn": null,
+        "fn": 3204
+      },
+      {
+        "threshold": 1.0,
+        "tp": 0,
+        "fp": 10,
+        "tn": null,
+        "fn": 12601
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 200,
+        "latency_seconds": 0.062042236328125,
+        "latency_seconds_stddev": 0.0,
+        "latency_seconds_min": 0.062042236328125,
+        "latency_seconds_max": 0.062042236328125
+      },
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.07090958207845688,
+        "latency_seconds_stddev": 0.006409514182087027,
+        "latency_seconds_min": 0.06262636184692383,
+        "latency_seconds_max": 0.0882265567779541
+      },
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06789888441562653,
+        "latency_seconds_stddev": 0.003024517899545398,
+        "latency_seconds_min": 0.0592954158782959,
+        "latency_seconds_max": 0.07266116142272949
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.07809114456176758,
+        "latency_seconds_stddev": 0.0,
+        "latency_seconds_min": 0.07809114456176758,
+        "latency_seconds_max": 0.07809114456176758
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "US_PASSPORT",
+    "latency_cpu_sec": 0.0017719160017270683,
+    "latency_cpu_stddev": 0.07632752058890187,
+    "latency_gpu_sec": 0.00024107366496484736,
+    "latency_gpu_stddev": 0.005599260245606014,
+    "tpr_at_default": 0.5836908610049956,
+    "fpr_at_default": null,
+    "f1_at_default": 0.6702434580148807,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 19863,
+        "fp": 5378,
+        "tn": null,
+        "fn": 14167
+      },
+      {
+        "threshold": 0.1,
+        "tp": 13707,
+        "fp": 4214,
+        "tn": null,
+        "fn": 14658
+      },
+      {
+        "threshold": 0.2,
+        "tp": 13707,
+        "fp": 3209,
+        "tn": null,
+        "fn": 14658
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 9963,
+        "fp": 1989,
+        "tn": null,
+        "fn": 16422
+      },
+      {
+        "threshold": 0.4,
+        "tp": 9963,
+        "fp": 1989,
+        "tn": null,
+        "fn": 16422
+      },
+      {
+        "threshold": 0.5,
+        "tp": 8076,
+        "fp": 1454,
+        "tn": null,
+        "fn": 17815
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 6318,
+        "fp": 942,
+        "tn": null,
+        "fn": 19570
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 4393,
+        "fp": 558,
+        "tn": null,
+        "fn": 21480
+      },
+      {
+        "threshold": 0.8,
+        "tp": 2512,
+        "fp": 269,
+        "tn": null,
+        "fn": 23361
+      },
+      {
+        "threshold": 0.9,
+        "tp": 861,
+        "fp": 111,
+        "tn": null,
+        "fn": 25012
+      },
+      {
+        "threshold": 1.0,
+        "tp": 0,
+        "fp": 8,
+        "tn": null,
+        "fn": 25873
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.0700726278366581,
+        "latency_seconds_stddev": 0.005109933895493221,
+        "latency_seconds_min": 0.06238150596618652,
+        "latency_seconds_max": 0.07960915565490723
+      },
+      {
+        "input_length_chars": 200,
+        "latency_seconds": 0.06442856788635254,
+        "latency_seconds_stddev": 0.0,
+        "latency_seconds_min": 0.06442856788635254,
+        "latency_seconds_max": 0.06442856788635254
+      },
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.061956627028329034,
+        "latency_seconds_stddev": 0.0024691859390991128,
+        "latency_seconds_min": 0.05627250671386719,
+        "latency_seconds_max": 0.06612968444824219
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.06936687231063843,
+        "latency_seconds_stddev": 0.0019287079169989572,
+        "latency_seconds_min": 0.0660696029663086,
+        "latency_seconds_max": 0.07091236114501953
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "EMAIL_ADDRESS",
+    "latency_cpu_sec": 0.0026668309671887204,
+    "latency_cpu_stddev": 0.09453657805835253,
+    "latency_gpu_sec": 0.0003374096178115216,
+    "latency_gpu_stddev": 0.005114530757871206,
+    "tpr_at_default": 0.9786564251086067,
+    "fpr_at_default": null,
+    "f1_at_default": 0.9643876411465442,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 16796,
+        "fp": 8083,
+        "tn": null,
+        "fn": 7476
+      },
+      {
+        "threshold": 0.1,
+        "tp": 16796,
+        "fp": 8083,
+        "tn": null,
+        "fn": 7476
+      },
+      {
+        "threshold": 0.2,
+        "tp": 16796,
+        "fp": 8083,
+        "tn": null,
+        "fn": 7476
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 16796,
+        "fp": 7485,
+        "tn": null,
+        "fn": 6879
+      },
+      {
+        "threshold": 0.4,
+        "tp": 16796,
+        "fp": 7485,
+        "tn": null,
+        "fn": 6879
+      },
+      {
+        "threshold": 0.5,
+        "tp": 16796,
+        "fp": 7485,
+        "tn": null,
+        "fn": 6879
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 16523,
+        "fp": 5161,
+        "tn": null,
+        "fn": 4609
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 16246,
+        "fp": 3247,
+        "tn": null,
+        "fn": 2728
+      },
+      {
+        "threshold": 0.8,
+        "tp": 15970,
+        "fp": 1954,
+        "tn": null,
+        "fn": 1463
+      },
+      {
+        "threshold": 0.9,
+        "tp": 15703,
+        "fp": 1227,
+        "tn": null,
+        "fn": 746
+      },
+      {
+        "threshold": 1.0,
+        "tp": 15544,
+        "fp": 809,
+        "tn": null,
+        "fn": 339
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 200,
+        "latency_seconds": 0.05966448783874512,
+        "latency_seconds_stddev": 0.0019003800108085356,
+        "latency_seconds_min": 0.0577542781829834,
+        "latency_seconds_max": 0.062256813049316406
+      },
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.06689466879918025,
+        "latency_seconds_stddev": 0.003435500787197317,
+        "latency_seconds_min": 0.06200146675109863,
+        "latency_seconds_max": 0.07651424407958984
+      },
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06419491767883301,
+        "latency_seconds_stddev": 0.0035900938820868284,
+        "latency_seconds_min": 0.0583498477935791,
+        "latency_seconds_max": 0.06834125518798828
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.06340786814689636,
+        "latency_seconds_stddev": 0.008783806693104808,
+        "latency_seconds_min": 0.05729365348815918,
+        "latency_seconds_max": 0.07943344116210938
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "LOCATION",
+    "latency_cpu_sec": 0.001449225129480021,
+    "latency_cpu_stddev": 0.07152375074788685,
+    "latency_gpu_sec": 0.00019719408958310936,
+    "latency_gpu_stddev": 0.005011772047089581,
+    "tpr_at_default": 0.7645484517421941,
+    "fpr_at_default": null,
+    "f1_at_default": 0.694941945188737,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 59517,
+        "fp": 34196,
+        "tn": null,
+        "fn": 18212
+      },
+      {
+        "threshold": 0.1,
+        "tp": 59196,
+        "fp": 34185,
+        "tn": null,
+        "fn": 18226
+      },
+      {
+        "threshold": 0.2,
+        "tp": 59196,
+        "fp": 34185,
+        "tn": null,
+        "fn": 18226
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 59134,
+        "fp": 33710,
+        "tn": null,
+        "fn": 18211
+      },
+      {
+        "threshold": 0.4,
+        "tp": 59134,
+        "fp": 33710,
+        "tn": null,
+        "fn": 18211
+      },
+      {
+        "threshold": 0.5,
+        "tp": 59134,
+        "fp": 33705,
+        "tn": null,
+        "fn": 18211
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 55755,
+        "fp": 27078,
+        "tn": null,
+        "fn": 21588
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 51198,
+        "fp": 20695,
+        "tn": null,
+        "fn": 26144
+      },
+      {
+        "threshold": 0.8,
+        "tp": 44930,
+        "fp": 14466,
+        "tn": null,
+        "fn": 32412
+      },
+      {
+        "threshold": 0.9,
+        "tp": 34631,
+        "fp": 7737,
+        "tn": null,
+        "fn": 42711
+      },
+      {
+        "threshold": 1.0,
+        "tp": 1,
+        "fp": 11,
+        "tn": null,
+        "fn": 77341
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06484560668468475,
+        "latency_seconds_stddev": 0.0031987191985207207,
+        "latency_seconds_min": 0.05654311180114746,
+        "latency_seconds_max": 0.06931209564208984
+      },
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.06910865060214338,
+        "latency_seconds_stddev": 0.005551538580261868,
+        "latency_seconds_min": 0.06056475639343262,
+        "latency_seconds_max": 0.07893776893615723
+      },
+      {
+        "input_length_chars": 500,
+        "latency_seconds": 0.06800017356872559,
+        "latency_seconds_stddev": 0.001433903777481523,
+        "latency_seconds_min": 0.06656384468078613,
+        "latency_seconds_max": 0.07063722610473633
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  },
+  {
+    "title": "PERSON",
+    "latency_cpu_sec": 0.001364449997189952,
+    "latency_cpu_stddev": 0.07061747358909091,
+    "latency_gpu_sec": 0.00018220674562680826,
+    "latency_gpu_stddev": 0.004183918048782535,
+    "tpr_at_default": 0.6243992206105218,
+    "fpr_at_default": null,
+    "f1_at_default": 0.44334960224434117,
+    "auc": null,
+    "roc_points": [
+      {
+        "threshold": 0.0,
+        "tp": 31665,
+        "fp": 67957,
+        "tn": null,
+        "fn": 14551
+      },
+      {
+        "threshold": 0.1,
+        "tp": 31654,
+        "fp": 67944,
+        "tn": null,
+        "fn": 14552
+      },
+      {
+        "threshold": 0.2,
+        "tp": 31650,
+        "fp": 67944,
+        "tn": null,
+        "fn": 14552
+      },
+      {
+        "threshold": 0.30000000000000004,
+        "tp": 31636,
+        "fp": 67880,
+        "tn": null,
+        "fn": 14557
+      },
+      {
+        "threshold": 0.4,
+        "tp": 31636,
+        "fp": 67880,
+        "tn": null,
+        "fn": 14557
+      },
+      {
+        "threshold": 0.5,
+        "tp": 31636,
+        "fp": 67877,
+        "tn": null,
+        "fn": 14557
+      },
+      {
+        "threshold": 0.6000000000000001,
+        "tp": 28841,
+        "fp": 55074,
+        "tn": null,
+        "fn": 17349
+      },
+      {
+        "threshold": 0.7000000000000001,
+        "tp": 23782,
+        "fp": 40996,
+        "tn": null,
+        "fn": 22406
+      },
+      {
+        "threshold": 0.8,
+        "tp": 15714,
+        "fp": 26513,
+        "tn": null,
+        "fn": 30473
+      },
+      {
+        "threshold": 0.9,
+        "tp": 6744,
+        "fp": 12474,
+        "tn": null,
+        "fn": 39443
+      },
+      {
+        "threshold": 1.0,
+        "tp": 8,
+        "fp": 45,
+        "tn": null,
+        "fn": 46179
+      }
+    ],
+    "latency_curve": [
+      {
+        "input_length_chars": 400,
+        "latency_seconds": 0.0648672898610433,
+        "latency_seconds_stddev": 0.004409805747012531,
+        "latency_seconds_min": 0.05778670310974121,
+        "latency_seconds_max": 0.07701683044433594
+      },
+      {
+        "input_length_chars": 300,
+        "latency_seconds": 0.06438627507951525,
+        "latency_seconds_stddev": 0.0033304245125647843,
+        "latency_seconds_min": 0.058510780334472656,
+        "latency_seconds_max": 0.0726161003112793
+      },
+      {
+        "input_length_chars": 200,
+        "latency_seconds": 0.057857394218444824,
+        "latency_seconds_stddev": 0.00017273426055908203,
+        "latency_seconds_min": 0.05768465995788574,
+        "latency_seconds_max": 0.058030128479003906
+      }
+    ],
+    "gpu_mem_idle_mb": 1699.310546875,
+    "disk_space_mb": 0.0,
+    "benchmark_cpu": "",
+    "benchmark_gpu": "",
+    "notes": ""
+  }
+]
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..b9c5011
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,25 @@
+[project]
+name = "guardrails_pii"
+version = "1.0.4"
+description = "Repo for the state-of-the-art Guardrails PII Validator."
+authors = [{ name = "Guardrails AI", email = "contact@guardrailsai.com" }]
+license = { file = "LICENSE" }
+readme = "README.md"
+requires-python = ">= 3.8.1"
+dependencies = [
+    "guardrails-ai>=0.4.0",
+    "gliner",
+    "presidio-analyzer",
+    "presidio-anonymizer",
+]
+
+[project.optional-dependencies]
+dev = ["pyright", "pytest", "ruff"]
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-rP"
+testpaths = ["tests"]
+
+[tool.pyright]
+include = ["validator"]
diff --git a/pyrightconfig.json b/pyrightconfig.json
new file mode 100644
index 0000000..865efbd
--- /dev/null
+++ b/pyrightconfig.json
@@ -0,0 +1,3 @@
+{
+  "exclude": ["validator/analyzer_engine.py"]
+}
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_validator.py b/tests/test_validator.py
new file mode 100644
index 0000000..767c9cb
--- /dev/null
+++ b/tests/test_validator.py
@@ -0,0 +1,23 @@
+from guardrails import Guard
+from validator import GuardrailsPII
+
+guard = Guard.from_string(
+    validators=[
+        GuardrailsPII(entities=["DATE_TIME"], on_fail="fix", use_local=True)
+    ]
+)
+
+
+def test_validator_success():
+    TEST_OUTPUT = "He is a soccer player."
+    raw_output, guarded_output, *rest = guard.parse(TEST_OUTPUT)
+    assert guarded_output == TEST_OUTPUT
+
+
+def test_validator_fail():
+    TEST_FAIL_OUTPUT = "Cristiano Ronaldo dos Santos Aveiro (born 5 February 1985) is a Portuguese professional footballer."
+    raw_output, guarded_output, *rest = guard.parse(TEST_FAIL_OUTPUT)
+    assert (
+        guarded_output
+        == "Cristiano Ronaldo dos Santos Aveiro (born <DATE_TIME>) is a Portuguese professional footballer."
+    )
diff --git a/validator/__init__.py b/validator/__init__.py
new file mode 100644
index 0000000..332dcf7
--- /dev/null
+++ b/validator/__init__.py
@@ -0,0 +1,3 @@
+from .main import GuardrailsPII
+
+__all__ = ["GuardrailsPII"]
diff --git a/validator/analyzer_engine.py b/validator/analyzer_engine.py
new file mode 100644
index 0000000..57adfd3
--- /dev/null
+++ b/validator/analyzer_engine.py
@@ -0,0 +1,95 @@
+from presidio_analyzer import (
+    AnalyzerEngine as BaseAnalyzerEngine,
+    RecognizerResult,
+    EntityRecognizer,
+)
+from presidio_analyzer.nlp_engine import NlpArtifacts
+import re
+from typing import Optional, List
+import json
+
+
+class AnalyzerEngine(BaseAnalyzerEngine):
+    def analyze(
+        self,
+        text: str,
+        language: str,
+        entities: Optional[List[str]] = None,
+        correlation_id: Optional[str] = None,
+        score_threshold: Optional[float] = None,
+        return_decision_process: Optional[bool] = False,
+        ad_hoc_recognizers: Optional[List[EntityRecognizer]] = None,
+        context: Optional[List[str]] = None,
+        allow_list: Optional[List[str]] = None,
+        allow_list_match: Optional[str] = "exact",
+        regex_flags: Optional[int] = re.DOTALL | re.MULTILINE | re.IGNORECASE,
+        nlp_artifacts: Optional[NlpArtifacts] = None,
+        deduplicate: bool = True,
+    ) -> List[RecognizerResult]:
+        all_fields = not entities
+
+        recognizers = self.registry.get_recognizers(
+            language=language,
+            entities=entities,
+            all_fields=all_fields,
+            ad_hoc_recognizers=ad_hoc_recognizers,
+        )
+
+        if all_fields:
+            # Since all_fields=True, list all entities by iterating
+            # over all recognizers
+            entities = self.get_supported_entities(language=language)
+
+        # run the nlp pipeline over the given text, store the results in
+        # a NlpArtifacts instance
+        if not nlp_artifacts:
+            nlp_artifacts = self.nlp_engine.process_text(text, language)
+
+        if self.log_decision_process and correlation_id:
+            self.app_tracer.trace(
+                correlation_id, "nlp artifacts:" + nlp_artifacts.to_json()
+            )
+
+        results = []
+        for recognizer in recognizers:
+            # Lazy loading of the relevant recognizers
+            if not recognizer.is_loaded:
+                recognizer.load()
+                recognizer.is_loaded = True
+
+            # analyze using the current recognizer and append the results
+            current_results = recognizer.analyze(
+                text=text, entities=entities, nlp_artifacts=nlp_artifacts
+            )
+            if current_results:
+                # add recognizer name to recognition metadata inside results
+                # if not exists
+                super()._AnalyzerEngine__add_recognizer_id_if_not_exists( # type: ignore
+                    current_results, recognizer
+                )
+                results.extend(current_results)
+
+        results = self._enhance_using_context(
+            text, results, nlp_artifacts, recognizers, context
+        )
+
+        if self.log_decision_process and correlation_id:
+            self.app_tracer.trace(
+                correlation_id,
+                json.dumps([str(result.to_dict()) for result in results]),
+            )
+
+        # Remove duplicates or low score results
+        if deduplicate:
+            results = EntityRecognizer.remove_duplicates(results)
+        results = super()._AnalyzerEngine__remove_low_scores(results, score_threshold) # type: ignore
+
+        if allow_list and allow_list_match:
+            results = self._remove_allow_list(
+                results, allow_list, text, regex_flags, allow_list_match
+            )
+
+        if not return_decision_process:
+            results = super()._AnalyzerEngine__remove_decision_process(results) # type: ignore
+
+        return results
diff --git a/validator/constants.py b/validator/constants.py
new file mode 100644
index 0000000..7f28d82
--- /dev/null
+++ b/validator/constants.py
@@ -0,0 +1,21 @@
+PRESIDIO_TO_GLINER = {
+    "LOCATION": ["location", "place", "address"],
+    "DATE_TIME": ["date", "time", "date of birth"],
+    "PERSON": ["person", "name"],
+    "PHONE_NUMBER": [
+        "phone number",
+    ],
+}
+
+GLINER_TO_PRESIDIO = {}
+for presidio, entities in PRESIDIO_TO_GLINER.items():
+    for entity in entities:
+        GLINER_TO_PRESIDIO[entity] = presidio
+
+DEFAULT_THRESHOLDS = {
+    "LOCATION": 0.5,
+    "DATE_TIME": 0.5,
+    "PERSON": 0.5,
+    "PHONE_NUMBER": 0.5,
+    "EMAIL_ADDRESS": 1.0,
+}
diff --git a/validator/gliner_recognizer.py b/validator/gliner_recognizer.py
new file mode 100644
index 0000000..a1ce76a
--- /dev/null
+++ b/validator/gliner_recognizer.py
@@ -0,0 +1,39 @@
+from presidio_analyzer import EntityRecognizer, RecognizerResult
+from gliner import GLiNER
+from .constants import PRESIDIO_TO_GLINER, GLINER_TO_PRESIDIO
+
+
+class GLiNERRecognizer(EntityRecognizer):
+    def __init__(self, supported_entities, model_name):
+        self.model_name = model_name
+        self.supported_entities = supported_entities
+
+        gliner_entities = set()
+
+        for entity in supported_entities:
+            if entity not in PRESIDIO_TO_GLINER:
+                continue
+            gliner_entities.update(PRESIDIO_TO_GLINER[entity])
+        self.gliner_entities = list(gliner_entities)
+
+        super().__init__(supported_entities=supported_entities)
+
+    def load(self) -> None:
+        """No loading required as the model is loaded in the constructor"""
+        self.model = GLiNER.from_pretrained(self.model_name)
+
+    def analyze(self, text, entities=None, nlp_artifacts=None):
+        results = self.model.predict_entities(text, self.gliner_entities)
+        return [
+            RecognizerResult(
+                entity_type=GLINER_TO_PRESIDIO[entity["label"]],
+                start=entity["start"],
+                end=entity["end"],
+                score=entity.get("score"),
+                recognition_metadata={
+                    RecognizerResult.RECOGNIZER_NAME_KEY: self.name,
+                    RecognizerResult.RECOGNIZER_IDENTIFIER_KEY: self.id,
+                },
+            )
+            for entity in results
+        ]
diff --git a/validator/main.py b/validator/main.py
new file mode 100644
index 0000000..b98a553
--- /dev/null
+++ b/validator/main.py
@@ -0,0 +1,190 @@
+import json
+from typing import Any, Callable, Dict, Optional, List, Tuple, Sequence, cast
+
+from guardrails.validator_base import (
+    FailResult,
+    PassResult,
+    ValidationResult,
+    Validator,
+    register_validator,
+)
+from guardrails.validator_base import ErrorSpan
+from presidio_anonymizer import AnonymizerEngine
+from presidio_analyzer import (
+    RecognizerRegistry,
+    EntityRecognizer,
+    RecognizerResult as AnalyzerRecognizerResult,
+)
+from presidio_anonymizer import RecognizerResult as AnonymizerRecognizerResult
+from .analyzer_engine import AnalyzerEngine
+from .gliner_recognizer import GLiNERRecognizer
+from .constants import PRESIDIO_TO_GLINER, DEFAULT_THRESHOLDS
+
+from pydantic import BaseModel
+
+def get_entity_threshold(entity: str) -> float:
+    if entity in DEFAULT_THRESHOLDS:
+        return DEFAULT_THRESHOLDS[entity]
+    if entity in PRESIDIO_TO_GLINER:
+        return 0.5
+    else:
+        return 0.0
+    
+class InferenceInput(BaseModel):
+    text: str
+    entities: List[str]
+
+
+class InferenceOutputResult(BaseModel):
+    entity_type: str
+    start: int
+    end: int
+    score: float
+    
+class InferenceOutput(BaseModel):
+    results: List[InferenceOutputResult] 
+    anonymized_text: str
+
+
+@register_validator(name="guardrails/guardrails_pii", data_type="string")
+class GuardrailsPII(Validator):
+    def __init__(
+        self,
+        entities: List[str],
+        model_name: str = "urchade/gliner_small-v2.1",
+        get_entity_threshold: Callable = get_entity_threshold,
+        on_fail: Optional[Callable] = None,
+        use_local: bool = True,
+        **kwargs,
+    ):
+        """Validates that the LLM-generated text does not contain Personally Identifiable Information (PII).
+
+        This validator uses Presidio and GLiNER to detect and anonymize PII in the generated text.
+
+        **Key Properties**
+
+        | Property                      | Description                         |
+        | ----------------------------- | ----------------------------------- |
+        | Name for `format` attribute   | `guardrails/guardrails_pii`    |
+        | Supported data types          | `string`                            |
+        | Programmatic fix              | Anonymized text                     |
+
+        Args:
+            entities (List[str]): A list of entity types to detect and anonymize.
+            model_name (str, optional): The name of the GLiNER model to use.
+                Defaults to "urchade/gliner_small-v2.1".
+            on_fail (Optional[Callable], optional): A callable to execute when the
+                validation fails. Defaults to None.
+        """
+        super().__init__(
+            on_fail=on_fail,
+            model_name=model_name,
+            entities=entities,
+            get_entity_threshold=get_entity_threshold,
+            use_local=use_local,
+            **kwargs,
+        )
+
+        self.entities = entities
+        self.model_name = model_name
+        self.get_entity_threshold = get_entity_threshold
+
+        if self.use_local:
+            self.gliner_recognizer = GLiNERRecognizer(
+                supported_entities=entities,
+                model_name=model_name,
+            )
+            registry = RecognizerRegistry()
+            registry.load_predefined_recognizers()
+            registry.add_recognizer(self.gliner_recognizer)
+            self.pii_analyzer = AnalyzerEngine(
+                registry=registry, supported_languages=["en"]
+            )
+            self.pii_anonymizer = AnonymizerEngine()
+
+    def _inference_local(self, model_input: InferenceInput):
+
+        text = model_input.text
+        entities = model_input.entities
+
+        results = self.pii_analyzer.analyze(
+            text=text,
+            language="en",
+            entities=entities,
+            deduplicate=False,
+        )
+
+        results = [
+            r
+            for r in results
+            if (
+                r.entity_type in PRESIDIO_TO_GLINER
+                and r.recognition_metadata[AnalyzerRecognizerResult.RECOGNIZER_NAME_KEY]
+                == self.gliner_recognizer.name
+            )
+            or (r.entity_type not in PRESIDIO_TO_GLINER and r.entity_type in entities)
+        ]
+
+        results = [
+            r for r in results if r.score >= self.get_entity_threshold(r.entity_type)
+        ]
+
+        results = EntityRecognizer.remove_duplicates(results)
+
+        anonymizer_results: Sequence[AnonymizerRecognizerResult] = [
+            AnonymizerRecognizerResult(
+                entity_type=r.entity_type, start=r.start, end=r.end, score=r.score
+            )
+            for r in results
+        ]
+
+        anonymized_text = self.pii_anonymizer.anonymize(text, anonymizer_results).text
+
+        # covert to simpler pydantic schema which is json serializable and used in remote endpoint
+        results = [
+            InferenceOutputResult(
+                entity_type=r.entity_type, start=r.start, end=r.end, score=r.score
+            )
+            for r in results
+        ]
+
+        return InferenceOutput(anonymized_text=anonymized_text, results=results)
+
+    def _inference_remote(self, model_input: InferenceInput):
+        request_body = {
+            "text": model_input.text,
+            "entities": model_input.entities,
+        }
+        response = self._hub_inference_request(json.dumps(request_body), self.validation_endpoint)
+
+        return InferenceOutput.model_validate(response)
+
+    def anonymize(self, text: str, entities: list[str]) -> Tuple[str, list[ErrorSpan]]:
+        input_request = InferenceInput(text=text, entities=entities)
+        output = self._inference(input_request)
+
+        output = cast(InferenceOutput, output)
+        error_spans = [
+            ErrorSpan(start=r.start, end=r.end, reason=r.entity_type) for r in output.results
+        ]
+
+        return output.anonymized_text, error_spans
+        
+
+    def _validate(self, value: Any, metadata: Dict = {}) -> ValidationResult:
+        entities = metadata.get("entities", self.entities)
+        if entities is None:
+            raise ValueError(
+                "`entities` must be set in order to use the `GlinerPII` validator."
+            )
+
+        anonymized_text, error_spans = self.anonymize(text=value, entities=entities)
+
+        if len(error_spans) == 0:
+            return PassResult()
+        else:
+            return FailResult(
+                error_message=f"The following text contains PII:\n{value}",
+                fix_value=anonymized_text,
+                error_spans=error_spans,
+            )
diff --git a/validator/post-install.py b/validator/post-install.py
new file mode 100644
index 0000000..0905452
--- /dev/null
+++ b/validator/post-install.py
@@ -0,0 +1,5 @@
+from presidio_analyzer import AnalyzerEngine
+from presidio_anonymizer import AnonymizerEngine
+
+analyzer = AnalyzerEngine()
+anonymizer = AnonymizerEngine()