RUCAIBox
diff --git a/‎.github/workflows/python-package.yml‎
Lines changed: 45 additions & 0 deletions b/‎.github/workflows/python-package.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎recbole/data/dataloader/knowledge_dataloader.py‎
Lines changed: 2 additions & 0 deletions b/‎recbole/data/dataloader/knowledge_dataloader.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎recbole/data/dataloader/neg_sample_mixin.py‎
Lines changed: 1 addition & 1 deletion b/‎recbole/data/dataloader/neg_sample_mixin.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recbole/data/dataloader/sequential_dataloader.py‎
Lines changed: 1 addition & 1 deletion b/‎recbole/data/dataloader/sequential_dataloader.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recbole/data/dataset/dataset.py‎
Lines changed: 52 additions & 4 deletions b/‎recbole/data/dataset/dataset.py‎
Lines changed: 52 additions & 4 deletions
diff --git a/‎recbole/data/dataset/kg_dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎recbole/data/dataset/kg_dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recbole/model/general_recommender/dmf.py‎
Lines changed: 1 addition & 2 deletions b/‎recbole/model/general_recommender/dmf.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎recbole/model/general_recommender/fism.py‎
Lines changed: 10 additions & 2 deletions b/‎recbole/model/general_recommender/fism.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎recbole/model/general_recommender/gcmc.py‎
Lines changed: 7 additions & 4 deletions b/‎recbole/model/general_recommender/gcmc.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎recbole/model/general_recommender/lightgcn.py‎
Lines changed: 7 additions & 4 deletions b/‎recbole/model/general_recommender/lightgcn.py‎
Lines changed: 7 additions & 4 deletions
@@ -0,0 +1,45 @@
+name: RecBole tests
+
+on:
+- pull_request
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pytest
+        pip install dgl
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+    # Use "python -m pytest" instead of "pytest" to fix imports
+    - name: Test metrics
+      run: |
+        python -m pytest -v tests/metrics
+    - name: Test evaluation_setting
+      run: |
+        python -m pytest -v tests/evaluation_setting
+    - name: Test model
+      run: |
+        python -m pytest -v tests/model/test_model_auto.py
+    - name: Test config
+      run: |
+        python -m pytest -v tests/config/test_config.py
+        export PYTHONPATH=.
+        python tests/config/test_command_line.py --use_gpu=False --valid_metric=Recall@10 --split_ratio=[0.7,0.2,0.1] --metrics=['Recall@10'] --epochs=200 --eval_setting='LO_RS' --learning_rate=0.3
+    - name: Test evaluation_setting
+      run: |
+        python -m pytest -v tests/evaluation_setting
+
@@ -182,6 +182,8 @@ def _next_batch_data(self):
         elif self.state == KGDataLoaderState.RS:
             return self.general_dataloader._next_batch_data()
         elif self.state == KGDataLoaderState.RSKG:
+            if self.kg_dataloader.pr >= self.kg_dataloader.pr_end:
+                self.kg_dataloader.pr = 0
             kg_data = self.kg_dataloader._next_batch_data()
             rec_data = self.general_dataloader._next_batch_data()
             rec_data.update(kg_data)
 
@@ -29,7 +29,7 @@ class NegSampleMixin(AbstractDataLoader):
         batch_size (int, optional): The batch_size of dataloader. Defaults to ``1``.
         dl_format (InputType, optional): The input type of dataloader. Defaults to
             :obj:`~recbole.utils.InputType.POINTWISE`.
-        shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaluts to ``False``.
+        shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
     """
     dl_type = DataLoaderType.NEGSAMPLE
 
 
@@ -143,7 +143,7 @@ def augmentation(self, uid_list, item_list_index, target_index, item_list_length
         new_dict = {
             self.uid_field: uid_list,
             self.item_list_field: np.zeros((new_length, self.max_item_list_len), dtype=np.int64),
-            self.time_list_field: np.zeros((new_length, self.max_item_list_len), dtype=np.int64),
+            self.time_list_field: np.zeros((new_length, self.max_item_list_len)),
             self.target_iid_field: self.dataset.inter_feat[self.iid_field][target_index].values,
             self.target_time_field: self.dataset.inter_feat[self.time_field][target_index].values,
             self.item_list_length_field: item_list_length,
 
@@ -55,11 +55,16 @@ class Dataset(object):
             Specially, if feature is loaded from Arg ``additional_feat_suffix``, its source has type str,
             which is the suffix of its local file (also the suffix written in Arg ``additional_feat_suffix``).
 
-        field2id_token (dict): Dict mapping feature name (str) to a list, which stores the original token of
-            this feature. For example, if ``test`` is token-like feature, ``token_a`` is remapped to 1, ``token_b``
+        field2id_token (dict): Dict mapping feature name (str) to a :class:`np.ndarray`, which stores the original token
+            of this feature. For example, if ``test`` is token-like feature, ``token_a`` is remapped to 1, ``token_b``
             is remapped to 2. Then ``field2id_token['test'] = ['[PAD]', 'token_a', 'token_b']``. (Note that 0 is
             always PADDING for token-like features.)
 
+        field2token_id (dict): Dict mapping feature name (str) to a dict, which stores the token remap table
+            of this feature. For example, if ``test`` is token-like feature, ``token_a`` is remapped to 1, ``token_b``
+            is remapped to 2. Then ``field2token_id['test'] = {'[PAD]': 0, 'token_a': 1, 'token_b': 2}``.
+            (Note that 0 is always PADDING for token-like features.)
+
         field2seqlen (dict): Dict mapping feature name (str) to its sequence length (int).
             For sequence features, their length can be either set in config,
             or set to the max sequence length of this feature.
@@ -116,6 +121,7 @@ def _get_preset(self):
         self.field2type = {}
         self.field2source = {}
         self.field2id_token = {}
+        self.field2token_id = {}
         self.field2seqlen = self.config['seq_len'] or {}
         self._preloaded_weight = {}
         self.benchmark_filename_list = self.config['benchmark_filename']
@@ -897,11 +903,13 @@ def _remap(self, remap_list):
         tokens, split_point = self._concat_remaped_tokens(remap_list)
         new_ids_list, mp = pd.factorize(tokens)
         new_ids_list = np.split(new_ids_list + 1, split_point)
-        mp = ['[PAD]'] + list(mp)
+        mp = np.array(['[PAD]'] + list(mp))
+        token_id = {t: i for i, t in enumerate(mp)}
 
         for (feat, field, ftype), new_ids in zip(remap_list, new_ids_list):
-            if (field not in self.field2id_token):
+            if field not in self.field2id_token:
                 self.field2id_token[field] = mp
+                self.field2token_id[field] = token_id
             if ftype == FeatureType.TOKEN:
                 feat[field] = new_ids
             elif ftype == FeatureType.TOKEN_SEQ:
@@ -1010,6 +1018,46 @@ def copy_field_property(self, dest_field, source_field):
         self.field2source[dest_field] = self.field2source[source_field]
         self.field2seqlen[dest_field] = self.field2seqlen[source_field]
 
+    @dlapi.set()
+    def token2id(self, field, tokens):
+        """Map external tokens to internal ids.
+
+        Args:
+            field (str): Field of external tokens.
+            tokens (str, list or np.ndarray): External tokens.
+
+        Returns:
+            int or np.ndarray: The internal ids of external tokens.
+        """
+        if isinstance(tokens, str):
+            if tokens in self.field2token_id[field]:
+                return self.field2token_id[field][tokens]
+            else:
+                raise ValueError('token [{}] is not existed')
+        elif isinstance(tokens, (list, np.ndarray)):
+            return np.array([self.token2id(field, token) for token in tokens])
+        else:
+            raise TypeError('The type of tokens [{}] is not supported')
+
+    @dlapi.set()
+    def id2token(self, field, ids):
+        """Map internal ids to external tokens.
+
+        Args:
+            field (str): Field of internal ids.
+            ids (int, list, np.ndarray or torch.Tensor): Internal ids.
+
+        Returns:
+            str or np.ndarray: The external tokens of internal ids.
+        """
+        try:
+            return self.field2id_token[field][ids]
+        except IndexError:
+            if isinstance(ids, list):
+                raise ValueError('[{}] is not a one-dimensional list'.format(ids))
+            else:
+                raise ValueError('[{}] is not a valid ids'.format(ids))
+
     @property
     @dlapi.set()
     def user_num(self):
 
@@ -353,7 +353,7 @@ def _remap_ID_all(self):
         item_tokens = self._get_rec_item_token()
         super()._remap_ID_all()
         self._sort_remaped_entities(item_tokens)
-        self.field2id_token[self.relation_field].append('[UI-Relation]')
+        self.field2id_token[self.relation_field] = np.append(self.field2id_token[self.relation_field], '[UI-Relation]')
 
     @property
     @dlapi.set()
 
@@ -170,8 +170,7 @@ def get_item_embedding(self):
         col = interaction_matrix.col
         i = torch.LongTensor([row, col])
         data = torch.FloatTensor(interaction_matrix.data)
-        item_matrix = torch.sparse.FloatTensor(i, data).to(self.device).transpose(0, 1)
-        
+        item_matrix = torch.sparse.FloatTensor(i, data, torch.Size(interaction_matrix.shape)).to(self.device).transpose(0, 1)
         item = torch.sparse.mm(item_matrix, self.item_linear.weight.t())
 
         item = self.item_fc_layers(item)
 
@@ -14,12 +14,13 @@
     https://github.com/AaronHeee/Neural-Attentive-Item-Similarity-Model
 """
 
+from logging import getLogger
+
 import torch
 import torch.nn as nn
-from torch.nn.init import normal_
-
 from recbole.model.abstract_recommender import GeneralRecommender
 from recbole.utils import InputType
+from torch.nn.init import normal_
 
 
 class FISM(GeneralRecommender):
@@ -36,6 +37,8 @@ def __init__(self, config, dataset):
 
         # load dataset info
         self.LABEL = config['LABEL_FIELD']
+        self.logger = getLogger()
+
         # get all users's history interaction information.the history item 
         # matrix is padding by the maximum number of a user's interactions
         self.history_item_matrix, self.history_lens, self.mask_mat = self.get_history_info(dataset)
@@ -49,6 +52,11 @@ def __init__(self, config, dataset):
         # split the too large dataset into the specified pieces
         if self.split_to > 0:
             self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to)
+        else:
+            self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \
+                                'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \
+                                'you can append it in the command line such as `--split_to=5`')
+
 
         # define layers and loss
         # construct source and destination item embedding matrix
 
@@ -143,10 +143,13 @@ def get_norm_adj_mat(self):
         # build adj matrix
         A = sp.dok_matrix((self.n_users + self.n_items,
                            self.n_users + self.n_items), dtype=np.float32)
-        A = A.tolil()
-        A[:self.n_users, self.n_users:] = self.interaction_matrix
-        A[self.n_users:, :self.n_users] = self.interaction_matrix.transpose()
-        A = A.todok()
+        inter_M = self.interaction_matrix
+        inter_M_t = self.interaction_matrix.transpose()
+        data_dict = dict(zip(zip(inter_M.row, inter_M.col+self.n_users),
+                             [1]*inter_M.nnz))
+        data_dict.update(dict(zip(zip(inter_M_t.row+self.n_users, inter_M_t.col),
+                                  [1]*inter_M_t.nnz)))
+        A._update(data_dict)
         # norm adj matrix
         sumArr = (A > 0).sum(axis=1)
         # add epsilon to avoid Devide by zero Warning
 
@@ -86,10 +86,13 @@ def get_norm_adj_mat(self):
         # build adj matrix
         A = sp.dok_matrix((self.n_users + self.n_items,
                            self.n_users + self.n_items), dtype=np.float32)
-        A = A.tolil()
-        A[:self.n_users, self.n_users:] = self.interaction_matrix
-        A[self.n_users:, :self.n_users] = self.interaction_matrix.transpose()
-        A = A.todok()
+        inter_M = self.interaction_matrix
+        inter_M_t = self.interaction_matrix.transpose()
+        data_dict = dict(zip(zip(inter_M.row, inter_M.col+self.n_users),
+                             [1]*inter_M.nnz))
+        data_dict.update(dict(zip(zip(inter_M_t.row+self.n_users, inter_M_t.col),
+                                  [1]*inter_M_t.nnz)))
+        A._update(data_dict)
         # norm adj matrix
         sumArr = (A > 0).sum(axis=1)
         # add epsilon to avoid Devide by zero Warning