diff --git a/outrank/algorithms/feature_ranking/ranking_mi_numba.py b/outrank/algorithms/feature_ranking/ranking_mi_numba.py index ca24ef2..deff335 100644 --- a/outrank/algorithms/feature_ranking/ranking_mi_numba.py +++ b/outrank/algorithms/feature_ranking/ranking_mi_numba.py @@ -81,24 +81,25 @@ def compute_entropies( initial_prob = _f_value_counts / all_events x_value_subspace = np.where(X == f_values[f_index]) + Y_classes = Y[x_value_subspace] - index = 0 + Y_classes_spoofed = np.roll(Y, _f_value_counts)[x_value_subspace] + nonzero_class_counts = np.zeros(len(class_values), dtype=np.int32) + nonzero_class_counts_spoofed = np.zeros(len(class_values), dtype=np.int32) # Cache nonzero counts - for c in class_values: + for index, c in enumerate(class_values): nonzero_class_counts[index] = np.count_nonzero(Y_classes == c) - index += 1 + nonzero_class_counts_spoofed[index] = np.count_nonzero(Y_classes_spoofed == c) + conditional_entropy += compute_conditional_entropy( Y_classes, class_values, _f_value_counts, initial_prob, nonzero_class_counts, ) if cardinality_correction: - # A neat hack that seems to work fine (permutations are expensive) - Y_classes = np.roll(Y, _f_value_counts)[x_value_subspace] - background_cond_entropy += compute_conditional_entropy( - Y_classes, class_values, _f_value_counts, initial_prob, nonzero_class_counts, + Y_classes_spoofed, class_values, _f_value_counts, initial_prob, nonzero_class_counts_spoofed, ) if not cardinality_correction: diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index b0a9af6..53954d8 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -72,9 +72,9 @@ def mixed_rank_graph( # Handle cont. types prior to interaction evaluation pbar.set_description('Encoding columns') - col_dots = '.' start_enc_timer = timer() tmp_df = pd.DataFrame({k : tmp_df[k].cat.codes for k in all_columns}) + end_enc_timer = timer() out_time_struct['encoding_columns'] = end_enc_timer - start_enc_timer diff --git a/setup.py b/setup.py index 934633f..8e8ebc2 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ def _read_description(): packages = [x for x in setuptools.find_packages() if x != 'test'] setuptools.setup( name='outrank', - version='0.94.2', + version='0.95', description='OutRank: Feature ranking for massive sparse data sets.', long_description=_read_description(), long_description_content_type='text/markdown',