From bb22c651e2919f8202a0a35396f3cb572a0e9f7c Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Mon, 21 Jun 2021 16:55:36 -0600 Subject: [PATCH] Fix bias transform with disabled users/items --- lenskit/algorithms/bias.py | 8 ++++++-- tests/test_bias.py | 25 ++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/lenskit/algorithms/bias.py b/lenskit/algorithms/bias.py index c33e1182b..2fa7e7b5f 100644 --- a/lenskit/algorithms/bias.py +++ b/lenskit/algorithms/bias.py @@ -122,13 +122,15 @@ def transform(self, ratings, *, indexes=False): if self.item_offsets_ is not None: rvps = rvps.join(self.item_offsets_, on='item', how='left') rvps['rating'] -= rvps['i_off'].fillna(0) + rvps = rvps.drop(columns='i_off') if self.user_offsets_ is not None: rvps = rvps.join(self.user_offsets_, on='user', how='left') rvps['rating'] -= rvps['u_off'].fillna(0) + rvps = rvps.drop(columns='u_off') if indexes: rvps['uidx'] = self.user_offsets_.index.get_indexer(rvps['user']) rvps['iidx'] = self.item_offsets_.index.get_indexer(rvps['item']) - return rvps.drop(columns=['u_off', 'i_off']) + return rvps def inverse_transform(self, ratings): """ @@ -142,10 +144,12 @@ def inverse_transform(self, ratings): if self.item_offsets_ is not None: rvps = rvps.join(self.item_offsets_, on='item', how='left') rvps['rating'] += rvps['i_off'].fillna(0) + del rvps['i_off'] if self.user_offsets_ is not None: rvps = rvps.join(self.user_offsets_, on='user', how='left') rvps['rating'] += rvps['u_off'].fillna(0) - return rvps.drop(columns=['u_off', 'i_off']) + del rvps['u_off'] + return rvps def transform_user(self, ratings): """ diff --git a/tests/test_bias.py b/tests/test_bias.py index e506b9da3..db459427d 100644 --- a/tests/test_bias.py +++ b/tests/test_bias.py @@ -8,7 +8,7 @@ import pandas as pd import numpy as np -from pytest import approx, raises +from pytest import approx, raises, mark from lenskit.util.test import ml_test @@ -225,6 +225,29 @@ def test_bias_transform_indexes(): assert denorm['rating'].values == approx(ratings['rating'].values, 1.0e-6) +@mark.parametrize(['users', 'items'], [(True, False), (False, True), (False, False)]) +def test_bias_transform_disable(users, items): + algo = Bias(users=users, items=items) + ratings = ml_test.ratings + + normed = algo.fit_transform(ratings) + + assert all(normed['user'] == ratings['user']) + assert all(normed['item'] == ratings['item']) + denorm = algo.inverse_transform(normed) + assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6) + + n2 = ratings + nr = n2.rating - algo.mean_ + if items: + n2 = n2.join(algo.item_offsets_, on='item') + nr = nr - n2.i_off + if users: + n2 = n2.join(algo.user_offsets_, on='user') + nr = nr - n2.u_off + assert normed['rating'].values == approx(nr.values) + + def test_bias_item_damp(): algo = Bias(users=False, damping=5) algo.fit(simple_df)