update gnn-algo

AmazingDD · Jul 30, 2022 · 2b88ca6 · 2b88ca6
1 parent fd3f027
commit 2b88ca6
Show file tree

Hide file tree

Showing 11 changed files with 427 additions and 208 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 </p>
 
 ![PyPI - Python Version](https://img.shields.io/badge/pyhton-3.5%2B-blue) 
-[![Version](https://img.shields.io/badge/version-2.1.3-orange)](https://github.com/AmazingDD/daisyRec) 
+[![Version](https://img.shields.io/badge/version-2.1.4-orange)](https://github.com/AmazingDD/daisyRec) 
 ![GitHub repo size](https://img.shields.io/github/repo-size/AmazingDD/daisyRec) 
 ![GitHub](https://img.shields.io/github/license/AmazingDD/daisyRec)
 [![arXiv](https://img.shields.io/badge/arXiv-daisyRec-%23B21B1B)](https://arxiv.org/abs/2206.10848)
@@ -44,7 +44,7 @@ git clone https://github.com/AmazingDD/daisyRec.git && cd daisyRec
   python test.py
   python tune.py
   ```
-- The GUI Command Generator for `test.py` and `tune.py`, which can assist you to quikly write arguments and run the fair comparison experiments, is now available [**here**](http://DaisyRecGuiCommandGenerator.pythonanywhere.com).
+- The GUI Command Generator for `test.py` and `tune.py`, which can assist you to quikly write arguments and run the fair comparison experiments, is now available [**here**](https://daisyrec.netlify.app/).
 
 
 ### Documentation 
@@ -53,7 +53,7 @@ The documentation of DaisyRec is available [here](https://daisyrec.readthedocs.i
 
 ### Implemented Algorithms
 
-Current model in daisyRec only take user-item-interactions into account, so FM-related models will be specialized.
+Models in daisyRec only take triples _<user, item, rating>_ into account, so FM-related models will be specialized accrodingly.
 Below are the algorithms implemented in daisyRec. More baselines will be added later.
 
 | **Model** | **Publication** |
@@ -70,6 +70,7 @@ Below are the algorithms implemented in daisyRec. More baselines will be added l
 | NGCF      | Neural Graph Collaborative Filtering |
 | Multi-VAE | Variational Autoencoders for Collaborative Filtering |
 | Item2Vec  | Item2vec: neural item embedding for collaborative filtering |
+| LightGCN  | LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation |
 
 ### Datasets
 

diff --git a/daisy/__init__.py b/daisy/__init__.py
@@ -1 +1 @@
-__version__ = 'v2.1.3'
+__version__ = 'v2.1.4'
diff --git a/daisy/assets/lightgcn.yaml b/daisy/assets/lightgcn.yaml
@@ -0,0 +1,6 @@
+factors: 64
+lr: 0.01
+reg_1: 0
+reg_2: 0
+epochs: 30
+num_layers: 2
diff --git a/daisy/assets/ngcf.yaml b/daisy/assets/ngcf.yaml
@@ -1,8 +1,8 @@
 factors: 36
-node_dropout: 0.5
-mess_dropout: 0.5
+node_dropout: 0.0
+mess_dropout: 0.1
 lr: 0.01
 reg_1: 0
 reg_2: 0
 epochs: 30
-node_dropout_flag: 1
+hidden_size_list: ~
diff --git a/daisy/model/LightGCNRecommender.py b/daisy/model/LightGCNRecommender.py
@@ -0,0 +1,211 @@
+'''
+@inproceedings{he2020lightgcn,
+  title={Lightgcn: Simplifying and powering graph convolution network for recommendation},
+  author={He, Xiangnan and Deng, Kuan and Wang, Xiang and Li, Yan and Zhang, Yongdong and Wang, Meng},
+  booktitle={Proceedings of the 43rd International ACM SIGIR conference on research and development in Information Retrieval},
+  pages={639--648},
+  year={2020}
+}
+'''
+import torch
+import torch.nn as nn
+
+import numpy as np
+import scipy.sparse as sp
+
+from daisy.model.AbstractRecommender import GeneralRecommender
+
+class LightGCN(GeneralRecommender):
+    def __init__(self, config):
+        '''
+        LightGCN Recommender Class
+
+        Parameters
+        ----------
+        user_num : int, the number of users
+        item_num : int, the number of items
+        factors : int, embedding dimension
+        num_layers : int, number of ego layers
+        epochs : int, number of training epochs
+        lr : float, learning rate
+        reg_1 : float, first-order regularization term
+        reg_2 : float, second-order regularization term
+        loss_type : str, loss function type
+        optimizer : str, optimization method for training the algorithms
+        initializer: str, parameter initializer
+        gpuid : str, GPU ID
+        early_stop : bool, whether to activate early stop mechanism
+        '''  
+        super(LightGCN, self).__init__(config)
+
+        self.epochs = config['epochs']
+        self.lr = config['lr']
+        self.topk = config['topk']
+        self.user_num = config['user_num']
+        self.item_num = config['item_num']
+
+        # get this matrix from utils.get_inter_matrix and add it in config
+        self.interaction_matrix = config['inter_matrix']
+
+        self.factors = config['factors']
+        self.num_layers = config['num_layers']
+        self.reg_1 = config['reg_1']
+        self.reg_2 = config['reg_2']
+
+        self.embed_user = nn.Embedding(self.user_num, self.factors)
+        self.embed_item = nn.Embedding(self.item_num, self.factors)
+
+        self.loss_type = config['loss_type']
+        self.optimizer = config['optimizer'] if config['optimizer'] != 'default' else 'adam'
+        self.initializer = config['initializer'] if config['initializer'] != 'default' else 'xavier_uniform'
+        self.early_stop = config['early_stop']
+
+        # storage variables for rank evaluation acceleration
+        self.restore_user_e = None
+        self.restore_item_e = None
+
+        # parameters initialization
+        self.apply(self._init_weight)
+
+        # generate intermediate data
+        self.norm_adj_matrix = self.get_norm_adj_mat().to(self.device)
+
+    def get_norm_adj_mat(self):
+        '''
+        Get the normalized interaction matrix of users and items.
+        Construct the square matrix from the training data and normalize it
+        using the laplace matrix.
+        .. math::
+            A_{hat} = D^{-0.5} \times A \times D^{-0.5}
+        Returns:
+            Sparse tensor of the normalized interaction matrix.
+        '''
+        # build adj matrix
+        A = sp.dok_matrix((self.user_num + self.item_num, self.user_num + self.item_num), dtype=np.float32)
+        inter_M = self.interaction_matrix
+        inter_M_t = self.interaction_matrix.transpose()
+        data_dict = dict(zip(zip(inter_M.row, inter_M.col + self.user_num), [1] * inter_M.nnz))
+        data_dict.update(dict(zip(zip(inter_M_t.row + self.user_num, inter_M_t.col), [1] * inter_M_t.nnz)))
+        A._update(data_dict)
+
+        # norm adj matrix
+        sum_arr = (A > 0).sum(axis=1)
+        # add epsilon to avoid divide by zero Warning
+        diag = np.array(sum_arr.flatten())[0] + 1e-7
+        diag = np.power(diag, -0.5)
+        D = sp.diags(diag)
+        L = D * A * D
+
+        # covert norm_adj matrix to tensor
+        L = sp.coo_matrix(L)
+        row = L.row
+        col = L.col
+        i = torch.LongTensor(np.array([row, col]))
+        data = torch.FloatTensor(L.data)
+        SparseL = torch.sparse.FloatTensor(i, data, torch.Size(L.shape))
+
+        return SparseL
+
+    def get_ego_embeddings(self):
+        ''' Get the embedding of users and items and combine to an new embedding matrix '''
+        user_embeddings = self.user_embedding.weight
+        item_embeddings = self.item_embedding.weight
+        ego_embeddings = torch.cat([user_embeddings, item_embeddings], dim=0)
+
+        return ego_embeddings
+
+    def forward(self):
+        all_embeddings = self.get_ego_embeddings()
+        embeddings_list = [all_embeddings]
+
+        for _ in range(self.num_layers):
+            all_embeddings = torch.sparse.mm(self.norm_adj_matrix, all_embeddings)
+            embeddings_list.append(all_embeddings)
+
+        lightgcn_all_embeddings = torch.stack(embeddings_list, dim=1)
+        lightgcn_all_embeddings = torch.mean(lightgcn_all_embeddings, dim=1)
+
+        user_embedding, item_embedding = torch.split(lightgcn_all_embeddings, [self.user_num, self.item_num])
+        return user_embedding, item_embedding
+
+    def calc_loss(self, batch):
+        # clear the storage variable when training
+        if self.restore_user_e is not None or self.restore_item_e is not None:
+            self.restore_user_e, self.restore_item_e = None, None
+
+        user = batch[0].to(self.device)
+        pos_item = batch[1].to(self.device)
+
+        embed_user, embed_item = self.forward()
+
+        u_embeddings = embed_user[user]
+        pos_embeddings = embed_item[pos_item]
+        pos_pred = torch.mul(u_embeddings, pos_embeddings).sum(dim=1)
+
+        u_ego_embeddings = self.embed_user(user)
+        pos_ego_embeddings = self.embed_item(pos_item)
+
+        if self.loss_type.upper() in ['CL', 'SL']:
+            label = batch[2].to(self.device)
+            loss = self.criterion(pos_pred, label)
+            # add regularization term
+            loss += self.reg_1 * (u_ego_embeddings.weight.norm(p=1) + pos_ego_embeddings.weight.norm(p=1))
+            loss += self.reg_2 * (u_ego_embeddings.weight.norm() + pos_ego_embeddings.weight.norm())
+
+        elif self.loss_type.upper() in ['BPR', 'TL', 'HL']:
+            neg_item = batch[2].to(self.device)
+            neg_embeddings = embed_item[neg_item]
+            neg_pred = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
+            neg_ego_embeddings = self.item_embedding(neg_item)
+
+            loss = self.criterion(pos_pred, neg_pred)
+
+            loss += self.reg_1 * (u_ego_embeddings.weight.norm(p=1) + pos_ego_embeddings.weight.norm(p=1) + neg_ego_embeddings.weight.norm(p=1))
+            loss += self.reg_2 * (u_ego_embeddings.weight.norm() + pos_ego_embeddings.weight.norm() + neg_ego_embeddings.weight.norm())
+
+        else:
+            raise NotImplementedError(f'Invalid loss type: {self.loss_type}')
+
+        return loss
+
+    def predict(self, u, i):
+        if self.restore_user_e is None or self.restore_item_e is None:
+            self.restore_user_e, self.restore_item_e = self.forward()
+
+        u_embedding = self.restore_user_e[u]
+        i_embedding = self.restore_item_e[i]
+        pred = torch.matmul(u_embedding, i_embedding.t())
+
+        return pred.cpu()
+
+    def rank(self, test_loader):
+        if self.restore_user_e is None or self.restore_item_e is None:
+            self.restore_user_e, self.restore_item_e = self.forward()
+
+        rec_ids = torch.tensor([], device=self.device)
+
+        for us, cands_ids in test_loader:
+            us = us.to(self.device)
+            cands_ids = cands_ids.to(self.device)
+
+            user_emb = self.restore_user_e[us].unsqueeze(dim=1) # batch * 1 * factor
+            item_emb = self.restore_item_e[cands_ids].transpose(0, 2, 1) # batch * factor * cand_num
+            scores = torch.bmm(user_emb, item_emb).squeeze() # batch * cand_num
+
+            rank_ids = torch.argsort(scores, descending=True)
+            rank_list = torch.gather(cands_ids, 1, rank_ids)
+            rank_list = rank_list[:, :self.topk]
+
+            rec_ids = torch.cat((rec_ids, rank_list), 0)
+
+        return rec_ids.cpu().numpy()
+
+    def full_rank(self, u):
+        if self.restore_user_e is None or self.restore_item_e is None:
+            self.restore_user_e, self.restore_item_e = self.forward()
+
+        user_emb = self.restore_user_e[u] # factor
+        items_emb = self.restore_item_e.data # item * factor
+        scores = torch.matmul(user_emb, items_emb.transpose(1, 0))
+
+        return torch.argsort(scores, descending=True)[:self.topk].cpu().numpy()