From 848e340d4d167fae4134deb891280b4e01ac5562 Mon Sep 17 00:00:00 2001
From: ejolly <eshin.jolly@gmail.com>
Date: Thu, 29 Jul 2021 22:36:38 -0400
Subject: [PATCH] add note about why this isn't working right now

---
 neighbors/_fit.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/neighbors/_fit.py b/neighbors/_fit.py
index a3bc4ed..bf9b0a1 100644
--- a/neighbors/_fit.py
+++ b/neighbors/_fit.py
@@ -147,6 +147,8 @@ def mult(X, M, W, H, data_range, eps, tol, n_iterations, verbose):
         # The np.multiply's below have the effect of only using observed (non-missing)
         # ratings when performing the factor matrix updates
 
+        # NOTE: Current issue seems to be that this *dramatically over-fits* compared to just filling in missing values with 0. Training RMSE goes way down < 1%, but testing RMSE increases substantially because some predictions aren't even on the right scale! This seems to be dataset dependent as this binary masking works decently well for other datasets. Triple-checked the implementation, but can't see to figure out why this occurs for some data and not others. All I can see if that for the datasets in which it occurs, the item x factor matrix is usually almost all 0s, with a few exceptionally large values (in the thousands). Try 'BestOfTimes' from the moth dataset as an example
+
         # Update H (factor x item)
         numer = W.T @ np.multiply(M, X)
         denom = W.T @ np.multiply(M, W @ H) + eps