Skip to content

Commit

Permalink
Version 0.1.56.dev0
Browse files Browse the repository at this point in the history
Updated tests and preset usage example
Added test generator example
  • Loading branch information
emeli-dral committed Aug 16, 2022
1 parent c374512 commit e4940da
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 3 deletions.
1 change: 1 addition & 0 deletions examples/sample_notebooks/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Classification Performance (Binary) | [link](classification_performance_breast_c
Probabilistic Classification Performance (Binary) | [link](probabilistic_classification_performance_breast_cancer.ipynb) | [link](https://colab.research.google.com/drive/1sE2H4mFSgtNe34JZMAeC3eLntid6oe1g) | Breast cancer sklearn.datasets
Data Quality | [link](data_quality_bike_sharing_demand.ipynb) | [link](https://colab.research.google.com/drive/1XDxs4k2wNHU9Xbxb9WI2rOgMkZFavyRd) | Bike sharing UCI: [link](https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset)
Tests and Test Presets | [link](tests_and_test_presets.ipynb) | [link](https://colab.research.google.com/drive/1xQJs6aQlU_7ludwpGGPnRLQpwQh65omr) | Adult data set openml
Automatic Test Generation | [link](test_generators.ipynb) | [link](https://colab.research.google.com/drive/1X4Bq9_I4U6WyDeqlC3Gj_eWPyS_4_uCg) | Adult data set openml

To learn how to adjust evidently as you need, refer to the [how-to questions](../how_to_questions/).

Expand Down
11 changes: 9 additions & 2 deletions examples/sample_notebooks/tests_and_test_presets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,14 +186,19 @@
" TestNumberOfColumns(),\n",
" TestNumberOfRows(),\n",
" TestNumberOfNulls(),\n",
" TestShareOfNulls(),\n",
" TestNumberOfColumnsWithNulls(),\n",
" TestNumberOfRowsWithNulls(),\n",
" TestShareOfColumnsWithNulls(),\n",
" TestShareOfRowsWithNulls(),\n",
" TestNumberOfDifferentNulls(),\n",
" TestNumberOfConstantColumns(),\n",
" TestNumberOfEmptyRows(),\n",
" TestNumberOfEmptyColumns(),\n",
" TestNumberOfDuplicatedRows(),\n",
" TestNumberOfDuplicatedColumns(),\n",
" TestColumnsType(),\n",
" \n",
"])\n",
"\n",
"data_integrity_dataset_tests.run(reference_data=adult_ref, current_data=adult_cur)\n",
Expand Down Expand Up @@ -383,7 +388,9 @@
"outputs": [],
"source": [
"data_integrity_column_tests = TestSuite(tests=[\n",
" TestColumnNumberOfNulls(column_name='education'),\n",
" TestColumnShareOfNulls(column_name='education'),\n",
" TestColumnNumberOfDifferentNulls(column_name='education'),\n",
" TestColumnAllConstantValues(column_name='education'),\n",
" TestColumnAllUniqueValues(column_name='education'),\n",
" TestColumnValueRegExp(column_name='education',reg_exp='^[0..9]')\n",
Expand Down Expand Up @@ -658,7 +665,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -672,7 +679,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
270 changes: 270 additions & 0 deletions examples/tests_and_test_presets.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "6e27703e",
"metadata": {},
"source": [
"# Evidently Tests and Test Presets"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "9c78ba61",
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name 'TestSuite' from 'evidently.v2.test_suite' (unknown location)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<cell line: 8>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m fetch_openml\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevidently\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ColumnMapping\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevidently\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv2\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtest_suite\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TestSuite\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevidently\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv2\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtests\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevidently\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv2\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtest_preset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m NoTargetPerformance, DataQuality, DataStability, DataDrift\n",
"\u001b[0;31mImportError\u001b[0m: cannot import name 'TestSuite' from 'evidently.v2.test_suite' (unknown location)"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from datetime import datetime\n",
"from sklearn.datasets import fetch_openml\n",
"\n",
"from evidently import ColumnMapping\n",
"from evidently.v2.test_suite import TestSuite\n",
"from evidently.v2.tests import *\n",
"\n",
"from evidently.v2.test_preset import NoTargetPerformance, DataQuality, DataStability, DataDrift"
]
},
{
"cell_type": "markdown",
"id": "922df2f0",
"metadata": {},
"source": [
"## Prepare Datasets"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c19c6681",
"metadata": {},
"outputs": [],
"source": [
"data = fetch_openml(name='adult', version=2, as_frame='auto')\n",
"df = data.frame\n",
"\n",
"#target and prediction\n",
"df['target'] = df['education-num']\n",
"df['prediction'] = df['education-num'].values + np.random.normal(0, 6, df.shape[0])\n",
"\n",
"#reference data\n",
"ref = df[~df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
"\n",
"#current data\n",
"curr = df[df.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]"
]
},
{
"cell_type": "markdown",
"id": "c64570ed",
"metadata": {},
"source": [
"## How to run tests for a dataset?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f02bf13",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"dataset_tests = TestSuite(tests=[\n",
" TestNumberOfColumns(),\n",
" TestNumberOfRows(),\n",
" TestNumberOfConstantColumns(),\n",
" TestNumberOfDuplicatedColumns(),\n",
" TestNumberOfDuplicatedRows(),\n",
" TestColumnsType(),\n",
" TestTargetFeaturesCorrelations(),\n",
" TestHighlyCorrelatedFeatures(),\n",
" TestShareOfDriftedFeatures() ,\n",
" TestNumberOfDriftedFeatures(),\n",
"])\n",
"\n",
"dataset_tests.run(reference_data=ref, current_data=curr)\n",
"dataset_tests"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f599e85",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"dataset_tests.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7adc8cc7",
"metadata": {},
"outputs": [],
"source": [
"dataset_tests.as_dict()"
]
},
{
"cell_type": "markdown",
"id": "12f2b02d",
"metadata": {},
"source": [
"## How to run tests for individual features?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f364320",
"metadata": {},
"outputs": [],
"source": [
"feature_level_tests = TestSuite(tests=[\n",
" TestMeanInNSigmas(column_name='hours-per-week'),\n",
" TestShareOfOutRangeValues(column_name='hours-per-week'),\n",
" TestColumnNANShare(column_name='education'),\n",
" TestFeatureValueDrift(column_name='education')\n",
"])\n",
"\n",
"feature_level_tests.run(reference_data=ref, current_data=curr)\n",
"feature_level_tests"
]
},
{
"cell_type": "markdown",
"id": "bcac8862",
"metadata": {},
"source": [
"## How to set test parameters?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78a285ed",
"metadata": {},
"outputs": [],
"source": [
"feature_level_tests = TestSuite(tests=[\n",
" TestMeanInNSigmas(column_name='hours-per-week', n_sigmas=3),\n",
" TestShareOfOutRangeValues(column_name='hours-per-week', lte=0),\n",
" #TestNumberOfOutListValues(column_name='education', lt=0),\n",
" TestColumnNANShare(column_name='education', lt=0.2),\n",
"])\n",
"\n",
"feature_level_tests.run(reference_data=ref, current_data=curr)\n",
"feature_level_tests"
]
},
{
"cell_type": "markdown",
"id": "c5e6225e",
"metadata": {},
"source": [
"## How to use presets?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb1e4bd2",
"metadata": {},
"outputs": [],
"source": [
"no_target_performance = TestSuite(tests=[\n",
" NoTargetPerformance(most_important_features=['education-num', 'hours-per-week']),\n",
"])\n",
"\n",
"no_target_performance.run(reference_data=ref,current_data=curr)\n",
"no_target_performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8137bdf8",
"metadata": {},
"outputs": [],
"source": [
"data_drift = TestSuite(tests=[\n",
" DataDrift(),\n",
"])\n",
"\n",
"data_drift.run(reference_data=ref, current_data=curr)\n",
"data_drift"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ad087be",
"metadata": {},
"outputs": [],
"source": [
"data_stability = TestSuite(tests=[\n",
" DataStability(),\n",
"])\n",
"\n",
"data_stability.run(reference_data=ref, current_data=curr)\n",
"data_stability"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ccadf8ae",
"metadata": {},
"outputs": [],
"source": [
"data_quality = TestSuite(tests=[\n",
" DataQuality(),\n",
"])\n",
"\n",
"data_quality.run(reference_data=ref,current_data=curr)\n",
"data_quality"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion src/evidently/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# coding: utf-8

version_info = (0, 1, 55, 'dev0')
version_info = (0, 1, 56, 'dev0')
__version__ = ".".join(map(str, version_info))

0 comments on commit e4940da

Please sign in to comment.