Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added option to pass arguments to classifier via dict #698

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 17 additions & 21 deletions lime/lime_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

import numpy as np
import sklearn
import sklearn.preprocessing
from sklearn.utils import check_random_state
from skimage.color import gray2rgb
from tqdm.auto import tqdm
from tqdm import tqdm


from . import lime_base
Expand All @@ -27,8 +28,7 @@ def __init__(self, image, segments):
self.segments = segments
self.intercept = {}
self.local_exp = {}
self.local_pred = {}
self.score = {}
self.local_pred = None

def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False,
num_features=5, min_weight=0.):
Expand Down Expand Up @@ -134,7 +134,7 @@ def explain_instance(self, image, classifier_fn, labels=(1,),
distance_metric='cosine',
model_regressor=None,
random_seed=None,
progress_bar=True):
classifier_args=None):
"""Generates explanations for a prediction.

First, we generate neighborhood data by randomly perturbing features
Expand All @@ -149,14 +149,13 @@ def explain_instance(self, image, classifier_fn, labels=(1,),
takes a numpy array and outputs prediction probabilities. For
ScikitClassifiers , this is classifier.predict_proba.
labels: iterable with labels to be explained.
hide_color: If not None, will hide superpixels with this color.
Otherwise, use the mean pixel color of the image.
hide_color: TODO
top_labels: if not None, ignore labels and produce explanations for
the K labels with highest prediction probabilities, where K is
this parameter.
num_features: maximum number of features present in explanation
num_samples: size of the neighborhood to learn the linear model
batch_size: batch size for model predictions
batch_size: TODO
distance_metric: the distance metric to use for weights.
model_regressor: sklearn regressor to use in explanation. Defaults
to Ridge regression in LimeBase. Must have model_regressor.coef_
Expand All @@ -166,7 +165,6 @@ def explain_instance(self, image, classifier_fn, labels=(1,),
random_seed: integer used as random seed for the segmentation
algorithm. If None, a random integer, between 0 and 1000,
will be generated using the internal random number generator.
progress_bar: if True, show tqdm progress bar.

Returns:
An ImageExplanation object (see lime_image.py) with the corresponding
Expand All @@ -181,7 +179,10 @@ def explain_instance(self, image, classifier_fn, labels=(1,),
segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4,
max_dist=200, ratio=0.2,
random_seed=random_seed)
segments = segmentation_fn(image)
try:
segments = segmentation_fn(image)
except ValueError as e:
raise e

fudged_image = image.copy()
if hide_color is None:
Expand All @@ -194,12 +195,10 @@ def explain_instance(self, image, classifier_fn, labels=(1,),
fudged_image[:] = hide_color

top = labels

data, labels = self.data_labels(image, fudged_image, segments,
classifier_fn, num_samples,
batch_size=batch_size,
progress_bar=progress_bar)

batch_size=batch_size, classifier_args=classifier_args)
distances = sklearn.metrics.pairwise_distances(
data,
data[0].reshape(1, -1),
Expand All @@ -214,8 +213,7 @@ def explain_instance(self, image, classifier_fn, labels=(1,),
for label in top:
(ret_exp.intercept[label],
ret_exp.local_exp[label],
ret_exp.score[label],
ret_exp.local_pred[label]) = self.base.explain_instance_with_data(
ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data(
data, labels, distances, label, num_features,
model_regressor=model_regressor,
feature_selection=self.feature_selection)
Expand All @@ -228,7 +226,7 @@ def data_labels(self,
classifier_fn,
num_samples,
batch_size=10,
progress_bar=True):
classifier_args=None):
"""Generates images and predictions in the neighborhood of this image.

Args:
Expand All @@ -240,7 +238,6 @@ def data_labels(self,
matrix of prediction probabilities
num_samples: size of the neighborhood to learn the linear model
batch_size: classifier_fn will be called on batches of this size.
progress_bar: if True, show tqdm progress bar.

Returns:
A tuple (data, labels), where:
Expand All @@ -253,8 +250,7 @@ def data_labels(self,
labels = []
data[0, :] = 1
imgs = []
rows = tqdm(data) if progress_bar else data
for row in rows:
for row in data:
temp = copy.deepcopy(image)
zeros = np.where(row == 0)[0]
mask = np.zeros(segments.shape).astype(bool)
Expand All @@ -263,10 +259,10 @@ def data_labels(self,
temp[mask] = fudged_image[mask]
imgs.append(temp)
if len(imgs) == batch_size:
preds = classifier_fn(np.array(imgs))
preds = classifier_fn(np.array(imgs), classifier_args=classifier_args)
labels.extend(preds)
imgs = []
if len(imgs) > 0:
preds = classifier_fn(np.array(imgs))
preds = classifier_fn(np.array(imgs), classifier_args=classifier_args)
labels.extend(preds)
return data, np.array(labels)
53 changes: 13 additions & 40 deletions lime/lime_tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
import sklearn
import sklearn.preprocessing
from sklearn.utils import check_random_state
from pyDOE2 import lhs
from scipy.stats.distributions import norm

from lime.discretize import QuartileDiscretizer
from lime.discretize import DecileDiscretizer
Expand Down Expand Up @@ -139,7 +137,7 @@ def __init__(self,
discretizer='quartile',
sample_around_instance=False,
random_state=None,
training_data_stats=None):
training_data_stats=None,):
"""Init function.

Args:
Expand Down Expand Up @@ -208,11 +206,10 @@ def __init__(self,
if discretize_continuous and not sp.sparse.issparse(training_data):
# Set the discretizer if training data stats are provided
if self.training_data_stats:
discretizer = StatsDiscretizer(
training_data, self.categorical_features,
self.feature_names, labels=training_labels,
data_stats=self.training_data_stats,
random_state=self.random_state)
discretizer = StatsDiscretizer(training_data, self.categorical_features,
self.feature_names, labels=training_labels,
data_stats=self.training_data_stats,
random_state=self.random_state)

if discretizer == 'quartile':
self.discretizer = QuartileDiscretizer(
Expand Down Expand Up @@ -305,7 +302,7 @@ def explain_instance(self,
num_samples=5000,
distance_metric='euclidean',
model_regressor=None,
sampling_method='gaussian'):
classifier_args=None):
"""Generates explanations for a prediction.

First, we generate neighborhood data by randomly perturbing features
Expand Down Expand Up @@ -333,8 +330,6 @@ def explain_instance(self,
model_regressor: sklearn regressor to use in explanation. Defaults
to Ridge regression in LimeBase. Must have model_regressor.coef_
and 'sample_weight' as a parameter to model_regressor.fit()
sampling_method: Method to sample synthetic data. Defaults to Gaussian
sampling. Can also use Latin Hypercube Sampling.

Returns:
An Explanation object (see explanation.py) with the corresponding
Expand All @@ -343,7 +338,7 @@ def explain_instance(self,
if sp.sparse.issparse(data_row) and not sp.sparse.isspmatrix_csr(data_row):
# Preventative code: if sparse, convert to csr format if not in csr format already
data_row = data_row.tocsr()
data, inverse = self.__data_inverse(data_row, num_samples, sampling_method)
data, inverse = self.__data_inverse(data_row, num_samples)
if sp.sparse.issparse(data):
# Note in sparse case we don't subtract mean since data would become dense
scaled_data = data.multiply(self.scaler.scale_)
Expand All @@ -358,7 +353,7 @@ def explain_instance(self,
metric=distance_metric
).ravel()

yss = predict_fn(inverse)
yss = predict_fn(inverse, classifier_args)

# for classification, the model needs to provide a list of tuples - classes
# along with prediction probabilities
Expand Down Expand Up @@ -455,8 +450,7 @@ def explain_instance(self,
for label in labels:
(ret_exp.intercept[label],
ret_exp.local_exp[label],
ret_exp.score[label],
ret_exp.local_pred[label]) = self.base.explain_instance_with_data(
ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data(
scaled_data,
yss,
distances,
Expand All @@ -474,8 +468,7 @@ def explain_instance(self,

def __data_inverse(self,
data_row,
num_samples,
sampling_method):
num_samples):
"""Generates a neighborhood around a prediction.

For numerical features, perturb them by sampling from a Normal(0,1) and
Expand All @@ -488,7 +481,6 @@ def __data_inverse(self,
Args:
data_row: 1d numpy array, corresponding to a row
num_samples: size of the neighborhood to learn the linear model
sampling_method: 'gaussian' or 'lhs'

Returns:
A tuple (data, inverse), where:
Expand Down Expand Up @@ -517,26 +509,9 @@ def __data_inverse(self,
instance_sample = data_row[:, non_zero_indexes]
scale = scale[non_zero_indexes]
mean = mean[non_zero_indexes]

if sampling_method == 'gaussian':
data = self.random_state.normal(0, 1, num_samples * num_cols
).reshape(num_samples, num_cols)
data = np.array(data)
elif sampling_method == 'lhs':
data = lhs(num_cols, samples=num_samples
).reshape(num_samples, num_cols)
means = np.zeros(num_cols)
stdvs = np.array([1]*num_cols)
for i in range(num_cols):
data[:, i] = norm(loc=means[i], scale=stdvs[i]).ppf(data[:, i])
data = np.array(data)
else:
warnings.warn('''Invalid input for sampling_method.
Defaulting to Gaussian sampling.''', UserWarning)
data = self.random_state.normal(0, 1, num_samples * num_cols
).reshape(num_samples, num_cols)
data = np.array(data)

data = self.random_state.normal(
0, 1, num_samples * num_cols).reshape(
num_samples, num_cols)
if self.sample_around_instance:
data = data * scale + instance_sample
else:
Expand Down Expand Up @@ -643,8 +618,6 @@ def __init__(self, training_data, mode="classification",
n_samples, n_timesteps * n_features)
self.n_timesteps = n_timesteps
self.n_features = n_features
if feature_names is None:
feature_names = ['feature%d' % i for i in range(n_features)]

# Update the feature names
feature_names = ['{}_t-{}'.format(n, n_timesteps - (i + 1))
Expand Down