Skip to content

Commit

Permalink
Added Prediction Probability Distribution widget to Probabilistic Cla…
Browse files Browse the repository at this point in the history
…ssification Report

Updated metrics calculation for Regression Performance Report
  • Loading branch information
emeli-dral committed Apr 21, 2021
1 parent 2cf009c commit c794c5f
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 24 deletions.
5 changes: 4 additions & 1 deletion evidently/tabs/prob_classification_performance_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
from evidently.widgets.prob_class_prod_metrics_matrix_widget import ProbClassProdMetricsMatrixWidget
from evidently.widgets.prob_class_ref_prediction_cloud_widget import ProbClassRefPredictionCloudWidget
from evidently.widgets.prob_class_prod_prediction_cloud_widget import ProbClassProdPredictionCloudWidget
from evidently.widgets.prob_class_ref_pred_distr_widget import ProbClassRefPredDistrWidget
from evidently.widgets.prob_class_prod_pred_distr_widget import ProbClassProdPredDistrWidget
from evidently.widgets.prob_class_ref_roc_curve_widget import ProbClassRefRocCurveWidget
from evidently.widgets.prob_class_prod_roc_curve_widget import ProbClassProdRocCurveWidget
from evidently.widgets.prob_class_ref_pr_curve_widget import ProbClassRefPRCurveWidget
from evidently.widgets.prob_class_prod_pr_curve_widget import ProbClassProdPRCurveWidget
from evidently.widgets.prob_class_ref_pr_table_widget import ProbClassRefPRTableWidget
from evidently.widgets.prob_class_prod_pr_table_widget import ProbClassProdPRTableWidget
#from evidently.widgets.tabs_widget import TabsWidget
from evidently.widgets.prob_class_confusion_based_feature_distr_table_widget import ProbClassConfusionBasedFeatureDistrTable

from evidently.widgets.widget import Widget
Expand All @@ -42,6 +43,8 @@ def _get_widgets(self) -> List[Widget]:
ProbClassProdMetricsMatrixWidget("Current: Quality Metrics by Class"),
ProbClassRefPredictionCloudWidget("Reference: Class Separation Quality"),
ProbClassProdPredictionCloudWidget("Current: Class Separation Quality"),
ProbClassRefPredDistrWidget("Reference: Probability Distribution"),
ProbClassProdPredDistrWidget("Current: Probability Distribution"),
ProbClassRefRocCurveWidget("Reference: ROC Curve"),
ProbClassProdRocCurveWidget("Current: ROC Curve"),
ProbClassRefPRCurveWidget("Reference: Precision-Recall Curve"),
Expand Down
127 changes: 127 additions & 0 deletions evidently/widgets/prob_class_prod_pred_distr_widget.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env python
# coding: utf-8

import json
import pandas as pd

import numpy as np

from sklearn import metrics, preprocessing
from pandas.api.types import is_numeric_dtype

import plotly.graph_objs as go
import plotly.figure_factory as ff

from evidently.model.widget import BaseWidgetInfo, AlertStats, AdditionalGraphInfo
from evidently.widgets.widget import Widget

red = "#ed0400"
grey = "#4d4d4d"


class ProbClassProdPredDistrWidget(Widget):
def __init__(self, title: str):
super().__init__()
self.title = title

def get_info(self) -> BaseWidgetInfo:
#if self.wi:
return self.wi
#raise ValueError("No prediction or target data provided")

def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
target_column = column_mapping.get('target')
prediction_column = column_mapping.get('prediction')
num_feature_names = column_mapping.get('numerical_features')
#target_names = column_mapping.get('target_names')
if num_feature_names is None:
num_feature_names = []
else:
num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])]

cat_feature_names = column_mapping.get('categorical_features')
if cat_feature_names is None:
cat_feature_names = []
else:
cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])]

else:
date_column = 'datetime' if 'datetime' in reference_data.columns else None
id_column = None
target_column = 'target' if 'target' in reference_data.columns else None
prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

utility_columns = [date_column, id_column, target_column, prediction_column]

num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))

#target_names = None

if production_data is not None and target_column is not None and prediction_column is not None:
production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
production_data.dropna(axis=0, how='any', inplace=True)

array_prediction = production_data[prediction_column].to_numpy()

prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]

#plot support bar
graphs = []

for label in prediction_column:
pred_distr = ff.create_distplot(
[
production_data[production_data[target_column] == label][label],
production_data[production_data[target_column] != label][label]
],
[str(label), "other"],
colors=[red, grey],
bin_size = 0.05,
show_curve = False,
show_rug=True)

pred_distr.update_layout(
xaxis_title = "Probability",
yaxis_title = "Share",
legend = dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)

pred_distr_json = json.loads(pred_distr.to_json())

graphs.append({
"id": "tab_" + str(label),
"title": str(label),
"graph":{
"data":pred_distr_json["data"],
"layout":pred_distr_json["layout"],
}
})

self.wi = BaseWidgetInfo(
title=self.title,
type="tabbed_graph",
details="",
alertStats=AlertStats(),
alerts=[],
alertsPosition="row",
insights=[],
size=1,
params={
"graphs": graphs
},
additionalGraphs=[],
)
else:
self.wi = None

129 changes: 129 additions & 0 deletions evidently/widgets/prob_class_ref_pred_distr_widget.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/usr/bin/env python
# coding: utf-8

import json
import pandas as pd

import numpy as np

from sklearn import metrics, preprocessing
from pandas.api.types import is_numeric_dtype

import plotly.graph_objs as go
import plotly.figure_factory as ff

from evidently.model.widget import BaseWidgetInfo, AlertStats, AdditionalGraphInfo
from evidently.widgets.widget import Widget

red = "#ed0400"
grey = "#4d4d4d"


class ProbClassRefPredDistrWidget(Widget):
def __init__(self, title: str):
super().__init__()
self.title = title

def get_info(self) -> BaseWidgetInfo:
if self.wi:
return self.wi
raise ValueError("No prediction or target data provided")

def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
target_column = column_mapping.get('target')
prediction_column = column_mapping.get('prediction')
num_feature_names = column_mapping.get('numerical_features')
#target_names = column_mapping.get('target_names')
if num_feature_names is None:
num_feature_names = []
else:
num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])]

cat_feature_names = column_mapping.get('categorical_features')
if cat_feature_names is None:
cat_feature_names = []
else:
cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])]

else:
date_column = 'datetime' if 'datetime' in reference_data.columns else None
id_column = None
target_column = 'target' if 'target' in reference_data.columns else None
prediction_column = 'prediction' if 'prediction' in reference_data.columns else None

utility_columns = [date_column, id_column, target_column, prediction_column]

num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))

#target_names = None

if target_column is not None and prediction_column is not None:
reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
reference_data.dropna(axis=0, how='any', inplace=True)

array_prediction = reference_data[prediction_column].to_numpy()

prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]

#plot support bar
graphs = []

for label in prediction_column:

pred_distr = ff.create_distplot(
[
reference_data[reference_data[target_column] == label][label],
reference_data[reference_data[target_column] != label][label]
],
[str(label), "other"],
colors=[red, grey],
bin_size = 0.05,
show_curve = False,
show_rug=True
)

pred_distr.update_layout(
xaxis_title = "Probability",
yaxis_title = "Share",
legend = dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)

pred_distr_json = json.loads(pred_distr.to_json())

graphs.append({
"id": "tab_" + str(label),
"title": str(label),
"graph":{
"data":pred_distr_json["data"],
"layout":pred_distr_json["layout"],
}
})

self.wi = BaseWidgetInfo(
title=self.title,
type="tabbed_graph",
details="",
alertStats=AlertStats(),
alerts=[],
alertsPosition="row",
insights=[],
size=1 if production_data is not None else 2,
params={
"graphs": graphs
},
additionalGraphs=[],
)
else:
self.wi = None

13 changes: 2 additions & 11 deletions evidently/widgets/reg_prod_quality_metrics_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,14 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
me = np.mean(production_data[prediction_column] - production_data[target_column])
sde = np.std(production_data[prediction_column] - production_data[target_column], ddof = 1)

abs_err = list(map(lambda x : abs(x[0] - x[1]),
zip(production_data[prediction_column], production_data[target_column])))
abs_err = np.abs(production_data[prediction_column] - production_data[target_column])
mae = np.mean(abs_err)
sdae = np.std(abs_err, ddof = 1)

abs_perc_err = list(map(lambda x : 100*abs(x[0] - x[1])/x[0],
zip(production_data[prediction_column], production_data[target_column])))
abs_perc_err = 100.*np.abs(production_data[prediction_column] - production_data[target_column])/production_data[target_column]
mape = np.mean(abs_perc_err)
sdape = np.std(abs_perc_err, ddof = 1)

#sqrt_err = list(map(lambda x : (x[0] - x[1])**2,
# zip(production_data[prediction_column], production_data[target_column])))
#mse = np.mean(sqrt_err)
#sdse = np.std(sqrt_err, ddof = 1)

#error_norm_json = json.loads(error_norm.to_json())

self.wi = BaseWidgetInfo(
title=self.title,
type="counter",
Expand Down
15 changes: 3 additions & 12 deletions evidently/widgets/reg_ref_quality_metrics_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,24 +66,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#calculate quality metrics
me = np.mean(reference_data[prediction_column] - reference_data[target_column])
sde = np.std(reference_data[prediction_column] - reference_data[target_column], ddof = 1)

abs_err = list(map(lambda x : abs(x[0] - x[1]),
zip(reference_data[target_column], reference_data[prediction_column])))

abs_err = np.abs(reference_data[prediction_column] - reference_data[target_column])
mae = np.mean(abs_err)
sdae = np.std(abs_err, ddof = 1)

abs_perc_err = list(map(lambda x : 100*abs(x[0] - x[1])/x[0],
zip(reference_data[target_column], reference_data[prediction_column])))
abs_perc_err = 100.*np.abs(reference_data[prediction_column] - reference_data[target_column])/reference_data[target_column]
mape = np.mean(abs_perc_err)
sdape = np.std(abs_perc_err, ddof = 1)

#sqrt_err = list(map(lambda x : (x[0] - x[1])**2,
# zip(reference_data[target_column], reference_data[prediction_column])))
#mse = np.mean(sqrt_err)
#sdse = np.std(sqrt_err, ddof = 1)

#error_norm_json = json.loads(error_norm.to_json())

self.wi = BaseWidgetInfo(
title="Reference: Model Quality (+/- std)",
type="counter",
Expand Down

0 comments on commit c794c5f

Please sign in to comment.