From a17e6e61617ef6e656e05a20ccf7c8fbad949000 Mon Sep 17 00:00:00 2001 From: amazingDD <1172392977@gmail.com> Date: Thu, 21 Jul 2022 19:49:41 +0800 Subject: [PATCH] update 2.1.0 --- daisy/__init__.py | 2 +- daisy/model/AbstractRecommender.py | 19 ++++- daisy/model/NFMRecommender.py | 10 ++- daisy/model/NGCFRecommender.py | 3 +- daisy/model/NeuMFRecommender.py | 15 ++-- daisy/utils/config.py | 110 +---------------------------- daisy/utils/metrics.py | 80 +++++++++++++++++++-- daisy/utils/utils.py | 43 ----------- run_examples/fair_hpo.py | 75 +++++++++++++++++++- run_examples/fair_rec.py | 34 ++++++++- setup.py | 6 +- 11 files changed, 213 insertions(+), 184 deletions(-) diff --git a/daisy/__init__.py b/daisy/__init__.py index 5898bb0..87cc967 100644 --- a/daisy/__init__.py +++ b/daisy/__init__.py @@ -1 +1 @@ -__version__ = 'v2.0.8' +__version__ = 'v2.1.0' diff --git a/daisy/model/AbstractRecommender.py b/daisy/model/AbstractRecommender.py index 0a484e5..c506432 100644 --- a/daisy/model/AbstractRecommender.py +++ b/daisy/model/AbstractRecommender.py @@ -4,7 +4,6 @@ import torch.nn as nn import torch.optim as optim -from daisy.utils.config import initializer_param_config, initializer_config from daisy.utils.loss import BPRLoss, TOP1Loss, HingeLoss @@ -17,6 +16,20 @@ def __init__(self): self.lr = 0.01 self.logger = None + self.initializer_param_config = { + 'normal': {'mean':0.0, 'std':0.01}, + 'uniform': {'a':0.0, 'b':1.0}, + 'xavier_normal': {'gain':1.0}, + 'xavier_uniform': {'gain':1.0} + } + + self.initializer_config = { + 'normal': nn.init.normal_, + 'uniform': nn.init.uniform_, + 'xavier_normal': nn.init.xavier_normal_, + 'xavier_uniform': nn.init.xavier_uniform_ + } + def calc_loss(self, batch): raise NotImplementedError @@ -55,11 +68,11 @@ def _build_optimizer(self, **kwargs): def _init_weight(self, m): if isinstance(m, nn.Linear): - initializer_config[self.initializer](m.weight, **initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](m.weight, **self.initializer_param_config[self.initializer]) if m.bias is not None: nn.init.constant_(m.bias.data, 0.) elif isinstance(m, nn.Embedding): - initializer_config[self.initializer](m.weight, **initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](m.weight, **self.initializer_param_config[self.initializer]) else: pass diff --git a/daisy/model/NFMRecommender.py b/daisy/model/NFMRecommender.py index 5e6a7f4..a18e37d 100644 --- a/daisy/model/NFMRecommender.py +++ b/daisy/model/NFMRecommender.py @@ -11,8 +11,6 @@ import torch.nn as nn from daisy.model.AbstractRecommender import GeneralRecommender -from daisy.utils.config import initializer_param_config, initializer_config - class NFM(GeneralRecommender): def __init__(self, config): @@ -93,8 +91,8 @@ def __init__(self, config): self._init_weight() def _init_weight(self): - initializer_config[self.initializer](self.embed_user.weight, **initializer_param_config[self.initializer]) - initializer_config[self.initializer](self.embed_item.weight, **initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](self.embed_user.weight, **self.initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](self.embed_item.weight, **self.initializer_param_config[self.initializer]) nn.init.constant_(self.u_bias.weight, 0.0) nn.init.constant_(self.i_bias.weight, 0.0) @@ -102,8 +100,8 @@ def _init_weight(self): if self.num_layers > 0: # len(self.layers) for m in self.deep_layers: if isinstance(m, nn.Linear): - initializer_config[self.initializer](m.weight, **initializer_param_config[self.initializer]) - initializer_config[self.initializer](self.prediction.weight, **initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](m.weight, **self.initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](self.prediction.weight, **self.initializer_param_config[self.initializer]) else: nn.init.constant_(self.prediction.weight, 1.0) diff --git a/daisy/model/NGCFRecommender.py b/daisy/model/NGCFRecommender.py index 092b431..e9e5a1e 100644 --- a/daisy/model/NGCFRecommender.py +++ b/daisy/model/NGCFRecommender.py @@ -12,7 +12,6 @@ import torch.nn.functional as F from daisy.model.AbstractRecommender import GeneralRecommender -from daisy.utils.config import initializer_config class NGCF(GeneralRecommender): @@ -62,7 +61,7 @@ def __init__(self, config): self.sparse_norm_adj.to(self.device) def init_weight(self): - initializer = initializer_config[self.initializer] + initializer = self.initializer_config[self.initializer] embedding_dict = nn.ParameterDict({ 'user_emb': nn.Parameter(initializer(torch.empty(self.n_user, self.emb_size))), diff --git a/daisy/model/NeuMFRecommender.py b/daisy/model/NeuMFRecommender.py index cff0a44..52daabd 100644 --- a/daisy/model/NeuMFRecommender.py +++ b/daisy/model/NeuMFRecommender.py @@ -11,7 +11,6 @@ import torch.nn as nn from daisy.model.AbstractRecommender import GeneralRecommender -from daisy.utils.config import initializer_param_config, initializer_config class NeuMF(GeneralRecommender): @@ -80,17 +79,17 @@ def __init__(self, config): def _init_weight(self): if not self.model == 'NeuMF-pre': - initializer_config[self.initializer](self.embed_user_GMF.weight, **initializer_param_config[self.initializer]) - initializer_config[self.initializer](self.embed_item_GMF.weight, **initializer_param_config[self.initializer]) - initializer_config[self.initializer](self.embed_user_MLP.weight, **initializer_param_config[self.initializer]) - initializer_config[self.initializer](self.embed_item_MLP.weight, **initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](self.embed_user_GMF.weight, **self.initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](self.embed_item_GMF.weight, **self.initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](self.embed_user_MLP.weight, **self.initializer_param_config[self.initializer]) + self.initializer_config[self.initializer](self.embed_item_MLP.weight, **self.initializer_param_config[self.initializer]) for m in self.MLP_layers: if isinstance(m, nn.Linear): - initializer_config[self.initializer](m.weight) - initializer_config[self.initializer]( + self.initializer_config[self.initializer](m.weight) + self.initializer_config[self.initializer]( self.predict_layer.weight, - **initializer_param_config[self.initializer]) + **self.initializer_param_config[self.initializer]) for m in self.modules(): if isinstance(m, nn.Linear) and m.bias is not None: m.bias.data.zero_() diff --git a/daisy/utils/config.py b/daisy/utils/config.py index 44d83b5..59e303c 100644 --- a/daisy/utils/config.py +++ b/daisy/utils/config.py @@ -1,29 +1,13 @@ import os import re import yaml +import torch import random import logging import colorlog import numpy as np from colorama import init -import torch -import torch.nn as nn - -from daisy.model.KNNCFRecommender import ItemKNNCF -from daisy.model.PureSVDRecommender import PureSVD -from daisy.model.SLiMRecommender import SLiM -from daisy.model.PopRecommender import MostPop -from daisy.model.MFRecommender import MF -from daisy.model.FMRecommender import FM -from daisy.model.Item2VecRecommender import Item2Vec -from daisy.model.NeuMFRecommender import NeuMF -from daisy.model.NFMRecommender import NFM -from daisy.model.NGCFRecommender import NGCF -from daisy.model.VAECFRecommender import VAECF -from daisy.model.EASERecommender import EASE - -from daisy.utils.metrics import Precision, Recall, NDCG, MRR, MAP, HR, F1, AUC, Coverage, Diversity, Popularity from daisy.utils.parser import parse_args from daisy.utils.utils import ensure_dir, get_local_time @@ -34,98 +18,6 @@ 'CRITICAL': 'red', } -tune_params_config = { - 'mostpop': [], - 'itemknn': ['maxk'], - 'puresvd': ['factors'], - 'slim': ['alpha', 'elastic'], - 'mf': ['num_ng', 'factors', 'lr', 'batch_size', 'reg_1', 'reg_2'], - 'fm': ['num_ng', 'factors', 'lr', 'batch_size', 'reg_1', 'reg_2'], - 'neumf': ['num_ng', 'factors', 'num_layers', 'dropout', 'lr', 'batch_size', 'reg_1', 'reg_2'], - 'nfm': ['num_ng', 'factors', 'num_layers', 'dropout', 'lr', 'batch_size', 'reg_1', 'reg_2'], - 'ngcf': ['num_ng', 'factors', 'node_dropout', 'mess_dropout', 'batch_size', 'lr', 'reg_1', 'reg_2'], - 'multi-vae': ['latent_dim', 'dropout','batch_size', 'lr', 'anneal_cap'], - 'ease': ['reg'], - 'item2vec': ['context_window', 'rho', 'lr', 'factors'], -} - -param_type_config = { - 'num_layers': 'int', - 'maxk': 'int', - 'factors': 'int', - 'alpha': 'float', - 'elastic': 'float', - 'num_ng': 'int', - 'lr': 'float', - 'batch_size': 'int', - 'reg_1': 'float', - 'reg_2': 'float', - 'dropout': 'float', - 'node_dropout': 'float', - 'mess_dropout': 'float', - 'latent_dim': 'int', - 'anneal_cap': 'float', - 'reg': 'float', - 'context_window': 'int', - 'rho': 'float' -} - -metrics_config = { - "recall": Recall, - "mrr": MRR, - "ndcg": NDCG, - "hr": HR, - "map": MAP, - "precision": Precision, - "f1": F1, - "auc": AUC, - "coverage": Coverage, - "diversity": Diversity, - "popularity": Popularity, -} - -metrics_name_config = { - "recall": 'Recall', - "mrr": 'MRR', - "ndcg": 'NDCG', - "hr": 'Hit Ratio', - "precision": 'Precision', - "f1": 'F1-score', - "auc": 'AUC', - "coverage": 'Coverage', - "diversity": 'Diversity', - "popularity": 'Average Popularity', -} - -model_config = { - 'mostpop': MostPop, - 'slim': SLiM, - 'itemknn': ItemKNNCF, - 'puresvd': PureSVD, - 'mf': MF, - 'fm': FM, - 'ngcf': NGCF, - 'neumf': NeuMF, - 'nfm': NFM, - 'multi-vae': VAECF, - 'item2vec': Item2Vec, - 'ease': EASE, -} - -initializer_param_config = { - 'normal': {'mean':0.0, 'std':0.01}, - 'uniform': {'a':0.0, 'b':1.0}, - 'xavier_normal': {'gain':1.0}, - 'xavier_uniform': {'gain':1.0} -} - -initializer_config = { - 'normal': nn.init.normal_, - 'uniform': nn.init.uniform_, - 'xavier_normal': nn.init.xavier_normal_, - 'xavier_uniform': nn.init.xavier_uniform_ -} - def init_seed(seed, reproducibility): ''' init random seed for random functions in numpy, torch, cuda and cudnn diff --git a/daisy/utils/metrics.py b/daisy/utils/metrics.py index 4fb684d..818e298 100644 --- a/daisy/utils/metrics.py +++ b/daisy/utils/metrics.py @@ -1,6 +1,60 @@ +import os import numpy as np +import pandas as pd + +metrics_name_config = { + "recall": 'Recall', + "mrr": 'MRR', + "ndcg": 'NDCG', + "hr": 'Hit Ratio', + "precision": 'Precision', + "f1": 'F1-score', + "auc": 'AUC', + "coverage": 'Coverage', + "diversity": 'Diversity', + "popularity": 'Average Popularity', +} + +def calc_ranking_results(test_ur, pred_ur, test_u, config): + ''' + calculate metrics with prediction results and candidates sets + + Parameters + ---------- + test_ur : defaultdict(set) + groud truths for user in test set + pred_ur : np.array + rank list for user in test set + test_u : list + the user in order from test set + ''' + logger = config['logger'] + path = config['res_path'] + if not os.path.exists(path): + os.makedirs(path) + + metric = Metric(config) + res = pd.DataFrame({ + 'KPI@K': [metrics_name_config[kpi_name] for kpi_name in config['metrics']] + }) + + common_ks = [1, 5, 10, 20, 30, 50] + if config['topk'] not in common_ks: + common_ks.append(config['topk']) + for topk in common_ks: + if topk > config['topk']: + continue + else: + rank_list = pred_ur[:, :topk] + kpis = metric.run(test_ur, rank_list, test_u) + if topk == 10: + for kpi_name, kpi_res in zip(config['metrics'], kpis): + kpi_name = metrics_name_config[kpi_name] + logger.info(f'{kpi_name}@{topk}: {kpi_res:.4f}') + + res[topk] = np.array(kpis) -from daisy.utils.config import metrics_config + return res class Metric(object): def __init__(self, config) -> None: @@ -13,13 +67,29 @@ def run(self, test_ur, pred_ur, test_u): res = [] for mc in self.metrics: if mc == "coverage": - kpi = metrics_config[mc](pred_ur, self.item_num) + kpi = Coverage(pred_ur, self.item_num) elif mc == "popularity": - kpi = metrics_config[mc](test_ur, pred_ur, test_u, self.item_pop) + kpi = Popularity(test_ur, pred_ur, test_u, self.item_pop) elif mc == "diversity": - kpi = metrics_config[mc](pred_ur, self.i_categories) + kpi = Diversity(pred_ur, self.i_categories) + elif mc == 'ndcg': + kpi = NDCG(test_ur, pred_ur, test_u) + elif mc == 'mrr': + kpi = MRR(test_ur, pred_ur, test_u) + elif mc == 'recall': + kpi = Recall(test_ur, pred_ur, test_u) + elif mc == 'precision': + kpi = Precision(test_ur, pred_ur, test_u) + elif mc == 'hr': + kpi = HR(test_ur, pred_ur, test_u) + elif mc == 'map': + kpi = MAP(test_ur, pred_ur, test_u) + elif kpi == 'f1': + kpi = F1(test_ur, pred_ur, test_u) + elif kpi == 'auc': + kpi = AUC(test_ur, pred_ur, test_u) else: - kpi = metrics_config[mc](test_ur, pred_ur, test_u) + raise ValueError(f'Invalid metric name {mc}') res.append(kpi) diff --git a/daisy/utils/utils.py b/daisy/utils/utils.py index 10a4d0e..d7dfc20 100644 --- a/daisy/utils/utils.py +++ b/daisy/utils/utils.py @@ -3,12 +3,9 @@ import logging import datetime import numpy as np -import pandas as pd import scipy.sparse as sp from collections import defaultdict -from daisy.utils.metrics import Metric -from daisy.utils.config import metrics_name_config def ensure_dir(path): if not os.path.exists(path): @@ -20,46 +17,6 @@ def get_local_time(): return cur -def calc_ranking_results(test_ur, pred_ur, test_u, config): - ''' - calculate metrics with prediction results and candidates sets - - Parameters - ---------- - test_ur : defaultdict(set) - groud truths for user in test set - pred_ur : np.array - rank list for user in test set - test_u : list - the user in order from test set - ''' - logger = config['logger'] - path = config['res_path'] - ensure_dir(path) - - metric = Metric(config) - res = pd.DataFrame({ - 'KPI@K': [metrics_name_config[kpi_name] for kpi_name in config['metrics']] - }) - - common_ks = [1, 5, 10, 20, 30, 50] - if config['topk'] not in common_ks: - common_ks.append(config['topk']) - for topk in common_ks: - if topk > config['topk']: - continue - else: - rank_list = pred_ur[:, :topk] - kpis = metric.run(test_ur, rank_list, test_u) - if topk == 10: - for kpi_name, kpi_res in zip(config['metrics'], kpis): - kpi_name = metrics_name_config[kpi_name] - logger.info(f'{kpi_name}@{topk}: {kpi_res:.4f}') - - res[topk] = np.array(kpis) - - return res - def get_ur(df): """ Method of getting user-rating pairs diff --git a/run_examples/fair_hpo.py b/run_examples/fair_hpo.py index c57bc61..e294dd0 100644 --- a/run_examples/fair_hpo.py +++ b/run_examples/fair_hpo.py @@ -3,12 +3,85 @@ import numpy as np from logging import getLogger -from daisy.utils.config import init_seed, init_config, init_logger, tune_params_config, param_type_config, model_config, metrics_config +from daisy.utils.config import init_seed, init_config, init_logger from daisy.utils.loader import RawDataReader, Preprocessor from daisy.utils.dataset import AEDataset, BasicDataset, CandidatesDataset, get_dataloader from daisy.utils.splitter import TestSplitter, ValidationSplitter from daisy.utils.utils import get_history_matrix, get_adj_mat, get_ur, build_candidates_set, ensure_dir from daisy.utils.sampler import BasicNegtiveSampler, SkipGramNegativeSampler +from daisy.utils.metrics import MAP, NDCG, Recall, Precision, HR, MRR +from daisy.model.KNNCFRecommender import ItemKNNCF +from daisy.model.PureSVDRecommender import PureSVD +from daisy.model.SLiMRecommender import SLiM +from daisy.model.PopRecommender import MostPop +from daisy.model.MFRecommender import MF +from daisy.model.FMRecommender import FM +from daisy.model.Item2VecRecommender import Item2Vec +from daisy.model.NeuMFRecommender import NeuMF +from daisy.model.NFMRecommender import NFM +from daisy.model.NGCFRecommender import NGCF +from daisy.model.VAECFRecommender import VAECF +from daisy.model.EASERecommender import EASE + +model_config = { + 'mostpop': MostPop, + 'slim': SLiM, + 'itemknn': ItemKNNCF, + 'puresvd': PureSVD, + 'mf': MF, + 'fm': FM, + 'ngcf': NGCF, + 'neumf': NeuMF, + 'nfm': NFM, + 'multi-vae': VAECF, + 'item2vec': Item2Vec, + 'ease': EASE, +} + +metrics_config = { + "recall": Recall, + "mrr": MRR, + "ndcg": NDCG, + "hr": HR, + "map": MAP, + "precision": Precision, +} + +tune_params_config = { + 'mostpop': [], + 'itemknn': ['maxk'], + 'puresvd': ['factors'], + 'slim': ['alpha', 'elastic'], + 'mf': ['num_ng', 'factors', 'lr', 'batch_size', 'reg_1', 'reg_2'], + 'fm': ['num_ng', 'factors', 'lr', 'batch_size', 'reg_1', 'reg_2'], + 'neumf': ['num_ng', 'factors', 'num_layers', 'dropout', 'lr', 'batch_size', 'reg_1', 'reg_2'], + 'nfm': ['num_ng', 'factors', 'num_layers', 'dropout', 'lr', 'batch_size', 'reg_1', 'reg_2'], + 'ngcf': ['num_ng', 'factors', 'node_dropout', 'mess_dropout', 'batch_size', 'lr', 'reg_1', 'reg_2'], + 'multi-vae': ['latent_dim', 'dropout','batch_size', 'lr', 'anneal_cap'], + 'ease': ['reg'], + 'item2vec': ['context_window', 'rho', 'lr', 'factors'], +} + +param_type_config = { + 'num_layers': 'int', + 'maxk': 'int', + 'factors': 'int', + 'alpha': 'float', + 'elastic': 'float', + 'num_ng': 'int', + 'lr': 'float', + 'batch_size': 'int', + 'reg_1': 'float', + 'reg_2': 'float', + 'dropout': 'float', + 'node_dropout': 'float', + 'mess_dropout': 'float', + 'latent_dim': 'int', + 'anneal_cap': 'float', + 'reg': 'float', + 'context_window': 'int', + 'rho': 'float' +} if __name__ == '__main__': ''' summarize hyper-parameter part (basic yaml + args + model yaml) ''' diff --git a/run_examples/fair_rec.py b/run_examples/fair_rec.py index d4506f1..b070e7a 100644 --- a/run_examples/fair_rec.py +++ b/run_examples/fair_rec.py @@ -2,12 +2,40 @@ from logging import getLogger from daisy.utils.splitter import TestSplitter -from daisy.utils.config import init_seed, init_config, init_logger, model_config +from daisy.utils.config import init_seed, init_config, init_logger from daisy.utils.loader import RawDataReader, Preprocessor from daisy.utils.sampler import BasicNegtiveSampler, SkipGramNegativeSampler from daisy.utils.dataset import get_dataloader, BasicDataset, CandidatesDataset, AEDataset -from daisy.utils.utils import get_ur, get_history_matrix, build_candidates_set, get_adj_mat, calc_ranking_results - +from daisy.utils.utils import get_ur, get_history_matrix, build_candidates_set, get_adj_mat +from daisy.utils.metrics import calc_ranking_results + +from daisy.model.KNNCFRecommender import ItemKNNCF +from daisy.model.PureSVDRecommender import PureSVD +from daisy.model.SLiMRecommender import SLiM +from daisy.model.PopRecommender import MostPop +from daisy.model.MFRecommender import MF +from daisy.model.FMRecommender import FM +from daisy.model.Item2VecRecommender import Item2Vec +from daisy.model.NeuMFRecommender import NeuMF +from daisy.model.NFMRecommender import NFM +from daisy.model.NGCFRecommender import NGCF +from daisy.model.VAECFRecommender import VAECF +from daisy.model.EASERecommender import EASE + +model_config = { + 'mostpop': MostPop, + 'slim': SLiM, + 'itemknn': ItemKNNCF, + 'puresvd': PureSVD, + 'mf': MF, + 'fm': FM, + 'ngcf': NGCF, + 'neumf': NeuMF, + 'nfm': NFM, + 'multi-vae': VAECF, + 'item2vec': Item2Vec, + 'ease': EASE, +} if __name__ == '__main__': ''' summarize hyper-parameter part (basic yaml + args + model yaml) ''' diff --git a/setup.py b/setup.py index bdc09a0..52913eb 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from os import path +# from os import path from setuptools import setup, find_packages classifiers = ["License :: OSI Approved :: MIT License"] @@ -40,14 +40,14 @@ name='daisyRec', packages=[package for package in find_packages() if package.startswith('daisy')], # packages = find_packages(exclude=['tests*']), - version='v2.0.8', # Ideally should be same as your GitHub release tag varsion + version='v2.1.0', # Ideally should be same as your GitHub release tag varsion description=('An easy-to-use library for recommender systems.'), long_description=long_description, # long_description_content_type="text/markdown", author='Yu Di', author_email='di.yu.2021@mitb.smu.edu.sg', url='https://github.com/AmazingDD/daisyRec', - download_url='https://github.com/AmazingDD/daisyRec/archive/refs/tags/v2.0.8.tar.gz', + download_url='https://github.com/AmazingDD/daisyRec/archive/refs/tags/v2.1.0.tar.gz', keywords=['ranking', 'recommendation'], include_package_data=True, install_requires=install_requires,