From 7549700d458bf970225919f1b2f2f4b1628e6311 Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Mon, 12 Apr 2021 22:05:17 +0200
Subject: [PATCH 1/8] siw plugin added

---
 avalanche/training/plugins/__init__.py |   1 +
 avalanche/training/plugins/siw.py      | 119 +++++++++++++++++++++++++
 examples/siw_cifar100.py               |  61 +++++++++++++
 3 files changed, 181 insertions(+)
 create mode 100644 avalanche/training/plugins/siw.py
 create mode 100644 examples/siw_cifar100.py

diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py
index 0b2c96703..7a5326c4a 100644
--- a/avalanche/training/plugins/__init__.py
+++ b/avalanche/training/plugins/__init__.py
@@ -9,3 +9,4 @@
 from .replay import ReplayPlugin
 from .strategy_plugin import StrategyPlugin
 from .synaptic_intelligence import SynapticIntelligencePlugin
+from .siw import SIWPlugin
diff --git a/avalanche/training/plugins/siw.py b/avalanche/training/plugins/siw.py
new file mode 100644
index 000000000..26cf560f7
--- /dev/null
+++ b/avalanche/training/plugins/siw.py
@@ -0,0 +1,119 @@
+from torch.utils.data import random_split, ConcatDataset
+from avalanche.benchmarks.utils import AvalancheConcatDataset
+from avalanche.training.plugins.strategy_plugin import StrategyPlugin
+from avalanche.benchmarks.utils.data_loader import \
+    MultiTaskJoinedBatchDataLoader
+import torch
+import torch.cuda as tc
+from torch.autograd import Variable
+import torch.nn as nn
+
+
+class SIWPlugin(StrategyPlugin):
+    """
+    Standardization of Initial Weights (SIW) plugin.
+    From https://arxiv.org/pdf/2008.13710.pdf
+
+    Performs past class initial weights replay and state-level score
+    calibration. The callbacks `before_training_exp`, `after_backward`,
+    `after_training_exp`,`before_eval_exp`, and `after_eval_forward`
+    are implemented.
+
+    The `before_training_exp` callback is implemented in order to keep
+    track of the classes in each experience
+
+    The `after_backward` callback is implemented in order to freeze past
+    class weights in the last fully connected layer
+
+    The `after_training_exp` callback is implemented in order to extract
+    new class images' scores and compute the model confidence at
+    each incremental state.
+
+    The `before_eval_exp` callback is implemented in order to standardize
+    initial weights before inference
+
+    The`after_eval_forward` is implemented in order to apply state-level
+    calibration at the inference time
+
+    The :batch_size: and :num_workers: parameters concern the new class
+    scores extraction.
+    """
+
+    def __init__(self, batch_size, num_workers):
+        super().__init__()
+        self.confidences = []
+        self.classes_per_experience = []
+        self.num_workers = num_workers
+        self.batch_size = batch_size
+
+    def before_training_exp(self, strategy, **kwargs):
+        """
+        Keep track of the classes encountered in each experience
+        """
+        self.classes_per_experience.append(
+            strategy.experience.classes_in_this_experience)
+
+    def after_backward(self, strategy, **kwargs):
+        """
+        Before executing the optimization step to perform
+        back-propagation, we zero the gradients of past class
+        weights and bias. This is equivalent to freeze past
+        class weights and bias, to let only the feature extractor
+        and the new class weights and bias evolve
+        """
+        previous_classes = len(strategy.experience.previous_classes)
+        strategy.model.fc.weight.grad[:previous_classes, :] = 0
+        strategy.model.fc.bias.grad[:previous_classes] = 0
+
+    @torch.no_grad()
+    def after_training_exp(self, strategy, **kwargs):
+        """
+        Extract new class images' scores and compute the model
+        confidence at each incremental state
+        """
+        strategy.model.eval()
+
+        dataset = strategy.experience.dataset
+        loader = torch.utils.data.DataLoader(
+            dataset, batch_size=self.batch_size,
+            num_workers=self.num_workers)
+
+        max_top1_scores = []
+        for i, data in enumerate(loader):
+            inputs, targets, task_labels = data
+            if tc.is_available():
+                inputs = inputs.to(strategy.device)
+            inputs = Variable(inputs)
+            logits = strategy.model(inputs)
+            max_score = torch.max(logits, dim=1)[0].tolist()
+            max_top1_scores.extend(max_score)
+        self.confidences.append(sum(max_top1_scores) /
+                                len(max_top1_scores))
+
+    def before_eval_exp(self, strategy, **kwargs):
+        """
+        Before evaluating the performance of our model, we standardize
+        all class weights (by subtracting their mean and dividing by
+        their standard deviation)
+        """
+        previous_classes = len(strategy.experience.previous_classes)
+        classes_seen_so_far = len(strategy.experience.classes_seen_so_far)
+
+        for i in range(previous_classes, classes_seen_so_far):
+            mu = torch.mean(strategy.model.fc.weight[i])
+            std = torch.std(strategy.model.fc.weight[i])
+
+            strategy.model.fc.weight.data[i] -= mu
+            strategy.model.fc.weight.data[i] /= std
+
+    def after_eval_forward(self, strategy, **kwargs):
+        """
+        Rectify past class scores by multiplying them by the model's
+        confidence in the current state and dividing them by the
+        model's confidence in the initial state in which a past
+        class was encountered for the first time
+        """
+        for exp in range(len(self.confidences)):
+            strategy.logits[:, self.classes_per_experience[exp]] *=\
+                self.confidences[strategy.experience.current_experience] \
+                / self.confidences[exp]
diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py
new file mode 100644
index 000000000..c83ba6528
--- /dev/null
+++ b/examples/siw_cifar100.py
@@ -0,0 +1,61 @@
+from avalanche.benchmarks.classic import SplitCIFAR100
+from torch.optim import SGD
+from torch.nn import CrossEntropyLoss
+from avalanche.models import SimpleMLP
+from avalanche.training.strategies import BaseStrategy
+from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \
+    AGEMPlugin, SIWPlugin
+import torchvision
+from avalanche.benchmarks.generators import filelist_scenario, \
+    dataset_scenario, tensor_scenario, paths_scenario
+from torchvision.transforms import Compose, CenterCrop, Normalize, \
+    Scale, Resize, ToTensor, ToPILImage
+import torchvision.transforms as transforms
+import torch.nn as nn
+import torch as th
+import torch.cuda as tc
+from torch.autograd import Variable
+
+################################################
+P = 10  # number of classes in each state
+device = 'cuda:0'
+#############################################
+siw = SIWPlugin(batch_size=32, num_workers=8)
+model = torchvision.models.resnet18(num_classes=100).to(device)
+optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9)
+criterion = CrossEntropyLoss()
+strategy = BaseStrategy(model, optimizer, criterion, plugins=[siw],
+                        device=device, train_epochs=10)
+
+normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
+                                 std=[0.2007, 0.1999, 0.1992])
+
+train_transform = transforms.Compose([
+    transforms.RandomResizedCrop(224),
+    transforms.RandomHorizontalFlip(),
+    transforms.ToTensor(),
+    normalize])
+
+test_transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    normalize])
+
+# scenario
+scenario = SplitCIFAR100(n_experiences=10, return_task_id=False,
+                         seed=1234, train_transform=train_transform,
+                         eval_transform=test_transform)
+
+# TRAINING LOOP
+print('Starting experiment...')
+results = []
+for i, experience in enumerate(scenario.train_stream):
+    print("Start of experience: ", experience.current_experience)
+    strategy.train(experience)
+    print('Training completed')
+    print('Computing accuracy on the test set')
+    res = strategy.eval(scenario.test_stream[i])
+    results.append(res)
+
+print('Results = ' + str(results))

From 089ffe1e58578ed61827bf4652c959c43be4f6e5 Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Sat, 17 Apr 2021 16:10:37 +0200
Subject: [PATCH 2/8] fix some code issues

---
 avalanche/training/plugins/siw.py             |  47 +++++--
 .../training/strategies/strategy_wrappers.py  |  96 +++++++++++++-
 examples/siw_cifar100.py                      | 121 +++++++++++-------
 tests/test_strategies.py                      |  38 ++++--
 4 files changed, 234 insertions(+), 68 deletions(-)

diff --git a/avalanche/training/plugins/siw.py b/avalanche/training/plugins/siw.py
index 26cf560f7..9f8555ecf 100644
--- a/avalanche/training/plugins/siw.py
+++ b/avalanche/training/plugins/siw.py
@@ -7,6 +7,9 @@
 import torch.cuda as tc
 from torch.autograd import Variable
 import torch.nn as nn
+from avalanche.training.utils import get_last_fc_layer, get_layer_by_name
+from typing import Optional
+from torch.nn import Linear
 
 
 class SIWPlugin(StrategyPlugin):
@@ -35,17 +38,33 @@ class weights in the last fully connected layer
     The`after_eval_forward` is implemented in order to apply state-level
     calibration at the inference time
 
+    The :siw_layer_name: parameter concerns the name of the last fully
+    connected layer of the network
+
     The :batch_size: and :num_workers: parameters concern the new class
     scores extraction.
     """
 
-    def __init__(self, batch_size, num_workers):
+    def __init__(self, model, siw_layer_name='fc', batch_size=32,
+                 num_workers=0):
         super().__init__()
         self.confidences = []
         self.classes_per_experience = []
+        self.model = model
+        self.siw_layer_name = siw_layer_name
         self.num_workers = num_workers
         self.batch_size = batch_size
 
+    def get_siw_layer(self) -> Optional[Linear]:
+        result = None
+        if self.siw_layer_name is None:
+            last_fc = get_last_fc_layer(self.model)
+            if last_fc is not None:
+                result = last_fc[1]
+        else:
+            result = get_layer_by_name(self.model, self.siw_layer_name)
+        return result
+
     def before_training_exp(self, strategy, **kwargs):
         """
         Keep track of the classes encountered in each experience
@@ -62,8 +81,12 @@ class weights and bias, to let only the feature extractor
         and the new class weights and bias evolve
         """
         previous_classes = len(strategy.experience.previous_classes)
-        strategy.model.fc.weight.grad[:previous_classes, :] = 0
-        strategy.model.fc.bias.grad[:previous_classes] = 0
+        last_layer = self.get_siw_layer()
+        if last_layer is None:
+            raise RuntimeError('Can\'t find this Linear layer')
+
+        last_layer.weight.grad[:previous_classes, :] = 0
+        last_layer.bias.grad[:previous_classes] = 0
 
     @torch.no_grad()
     def after_training_exp(self, strategy, **kwargs):
@@ -83,7 +106,6 @@ def after_training_exp(self, strategy, **kwargs):
             inputs, targets, task_labels = data
             if tc.is_available():
                 inputs = inputs.to(strategy.device)
-            inputs = Variable(inputs)
             logits = strategy.model(inputs)
             max_score = torch.max(logits, dim=1)[0].tolist()
             max_top1_scores.extend(max_score)
@@ -96,15 +118,18 @@ def before_eval_exp(self, strategy, **kwargs):
         all class weights (by subtracting their mean and dividing by
         their standard deviation)
         """
-        previous_classes = len(strategy.experience.previous_classes)
+        last_layer = self.get_siw_layer()
+        if last_layer is None:
+            raise RuntimeError('Can\'t find this Linear layer')
+
         classes_seen_so_far = len(strategy.experience.classes_seen_so_far)
 
-        for i in range(previous_classes, classes_seen_so_far):
-            mu = torch.mean(strategy.model.fc.weight[i])
-            std = torch.std(strategy.model.fc.weight[i])
+        for i in range(classes_seen_so_far):
+            mu = torch.mean(last_layer.weight[i])
+            std = torch.std(last_layer.weight[i])
 
-            strategy.model.fc.weight.data[i] -= mu
-            strategy.model.fc.weight.data[i] /= std
+            last_layer.weight.data[i] -= mu
+            last_layer.weight.data[i] /= std
 
     def after_eval_forward(self, strategy, **kwargs):
         """
@@ -114,6 +139,6 @@ def after_eval_forward(self, strategy, **kwargs):
         class was encountered for the first time
         """
         for exp in range(len(self.confidences)):
-            strategy.logits[:, self.classes_per_experience[exp]] *=\
+            strategy.logits[:, self.classes_per_experience[exp]] *= \
                 self.confidences[strategy.experience.current_experience] \
                 / self.confidences[exp]
diff --git a/avalanche/training/strategies/strategy_wrappers.py b/avalanche/training/strategies/strategy_wrappers.py
index d2c876baa..6cf59fb9e 100644
--- a/avalanche/training/strategies/strategy_wrappers.py
+++ b/avalanche/training/strategies/strategy_wrappers.py
@@ -16,7 +16,7 @@
 from avalanche.training import default_logger
 from avalanche.training.plugins import StrategyPlugin, CWRStarPlugin, \
     ReplayPlugin, GDumbPlugin, LwFPlugin, AGEMPlugin, GEMPlugin, EWCPlugin, \
-    EvaluationPlugin, SynapticIntelligencePlugin
+    EvaluationPlugin, SynapticIntelligencePlugin, SIWPlugin
 from avalanche.training.strategies.base_strategy import BaseStrategy
 
 
@@ -444,6 +444,97 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             eval_every=eval_every
         )
 
+    def __init__(self, model: Module, optimizer: Optimizer, criterion,
+                 patterns_per_exp: int, memory_strength: float = 0.5,
+                 train_mb_size: int = 1, train_epochs: int = 1,
+                 eval_mb_size: int = None, device=None,
+                 plugins: Optional[List[StrategyPlugin]] = None,
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+        """ Gradient Episodic Memory (GEM) strategy.
+            See GEM plugin for details.
+            This strategy does not use task identities.
+
+        :param model: The model.
+        :param optimizer: The optimizer to use.
+        :param criterion: The loss criterion to use.
+        :param patterns_per_exp: number of patterns per experience in the memory
+        :param memory_strength: offset to add to the projection direction
+            in order to favour backward transfer (gamma in original paper).
+        :param train_mb_size: The train minibatch size. Defaults to 1.
+        :param train_epochs: The number of training epochs. Defaults to 1.
+        :param eval_mb_size: The eval minibatch size. Defaults to 1.
+        :param device: The device to use. Defaults to None (cpu).
+        :param plugins: Plugins to be added. Defaults to None.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop.
+                if -1: no evaluation during training.
+                if  0: calls `eval` after the final epoch of each training
+                    experience.
+                if >0: calls `eval` every `eval_every` epochs and at the end
+                    of all the epochs for a single experience.
+        """
+
+        gem = GEMPlugin(patterns_per_exp, memory_strength)
+        if plugins is None:
+            plugins = [gem]
+        else:
+            plugins.append(gem)
+
+        super().__init__(
+            model, optimizer, criterion,
+            train_mb_size=train_mb_size, train_epochs=train_epochs,
+            eval_mb_size=eval_mb_size, device=device, plugins=plugins,
+            evaluator=evaluator, eval_every=eval_every)
+
+
+class SIW(BaseStrategy):
+    def __init__(self, model: Module, optimizer: Optimizer, criterion,
+                 siw_layer_name: str = 'fc',
+                 batch_size: int = 32, num_workers: int = 0,
+                 train_mb_size: int = 1, train_epochs: int = 1,
+                 eval_mb_size: int = None, device=None,
+                 plugins: Optional[List[StrategyPlugin]] = None,
+                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
+        """ Standardization of Initial Weights (SIW) strategy.
+            See SIW plugin for details.
+            This strategy does not use task identities.
+
+        :param model: The model.
+        :param optimizer: The optimizer to use.
+        :param criterion: The loss criterion to use.
+        :param siw_layer_name: The name of the last fully connected layer
+        :param num_workers: The number of workers used to load batches
+        :param batch_size: The batch size used to extract scores
+        :param train_mb_size: The train minibatch size. Defaults to 1.
+        :param train_epochs: The number of training epochs. Defaults to 1.
+        :param eval_mb_size: The eval minibatch size. Defaults to 1.
+        :param device: The device to use. Defaults to None (cpu).
+        :param plugins: Plugins to be added. Defaults to None.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop.
+                if -1: no evaluation during training.
+                if  0: calls `eval` after the final epoch of each training
+                    experience.
+                if >0: calls `eval` every `eval_every` epochs and at the end
+                    of all the epochs for a single experience.
+        """
+
+        siw = SIWPlugin(model, siw_layer_name, batch_size, num_workers)
+        if plugins is None:
+            plugins = [siw]
+        else:
+            plugins.append(siw)
+
+        super().__init__(
+            model, optimizer, criterion,
+            train_mb_size=train_mb_size, train_epochs=train_epochs,
+            eval_mb_size=eval_mb_size, device=device, plugins=plugins,
+            evaluator=evaluator, eval_every=eval_every)
+
 
 __all__ = [
     'Naive',
@@ -454,5 +545,6 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
     'AGEM',
     'GEM',
     'EWC',
-    'SynapticIntelligence'
+    'SynapticIntelligence',
+    'SIW'
 ]
diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py
index c83ba6528..c661ee307 100644
--- a/examples/siw_cifar100.py
+++ b/examples/siw_cifar100.py
@@ -2,7 +2,7 @@
 from torch.optim import SGD
 from torch.nn import CrossEntropyLoss
 from avalanche.models import SimpleMLP
-from avalanche.training.strategies import BaseStrategy
+from avalanche.training.strategies import Naive
 from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \
     AGEMPlugin, SIWPlugin
 import torchvision
@@ -12,50 +12,79 @@
     Scale, Resize, ToTensor, ToPILImage
 import torchvision.transforms as transforms
 import torch.nn as nn
-import torch as th
-import torch.cuda as tc
+import torch
 from torch.autograd import Variable
+import argparse
 
-################################################
-P = 10  # number of classes in each state
-device = 'cuda:0'
-#############################################
-siw = SIWPlugin(batch_size=32, num_workers=8)
-model = torchvision.models.resnet18(num_classes=100).to(device)
-optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9)
-criterion = CrossEntropyLoss()
-strategy = BaseStrategy(model, optimizer, criterion, plugins=[siw],
-                        device=device, train_epochs=10)
-
-normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
-                                 std=[0.2007, 0.1999, 0.1992])
-
-train_transform = transforms.Compose([
-    transforms.RandomResizedCrop(224),
-    transforms.RandomHorizontalFlip(),
-    transforms.ToTensor(),
-    normalize])
-
-test_transform = transforms.Compose([
-    transforms.Resize(256),
-    transforms.CenterCrop(224),
-    transforms.ToTensor(),
-    normalize])
-
-# scenario
-scenario = SplitCIFAR100(n_experiences=10, return_task_id=False,
-                         seed=1234, train_transform=train_transform,
-                         eval_transform=test_transform)
-
-# TRAINING LOOP
-print('Starting experiment...')
-results = []
-for i, experience in enumerate(scenario.train_stream):
-    print("Start of experience: ", experience.current_experience)
-    strategy.train(experience)
-    print('Training completed')
-    print('Computing accuracy on the test set')
-    res = strategy.eval(scenario.test_stream[i])
-    results.append(res)
-
-print('Results = ' + str(results))
+
+def main(args):
+    # check if selected GPU is available or use CPU
+    assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
+    device = torch.device(f"cuda:{args.cuda}"
+                          if torch.cuda.is_available()
+                          and args.cuda >= 0 else "cpu")
+    print(f'Using device: {device}')
+    #############################################
+    model = torchvision.models.resnet18(num_classes=100).to(device)
+
+    siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name,
+                    batch_size=args.siw_batch_size,
+                    num_workers=args.siw_num_workers)
+    optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
+    criterion = CrossEntropyLoss()
+    strategy = Naive(model, optimizer, criterion, plugins=[siw],
+                     device=device, train_epochs=args.epochs)
+
+    normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
+                                     std=[0.2007, 0.1999, 0.1992])
+
+    train_transform = transforms.Compose([
+        transforms.RandomResizedCrop(224),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        normalize])
+
+    test_transform = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        normalize])
+
+    # scenario
+    scenario = SplitCIFAR100(n_experiences=10, return_task_id=False,
+                             seed=1234, train_transform=train_transform,
+                             eval_transform=test_transform)
+
+    # TRAINING LOOP
+    print('Starting experiment...')
+    results = []
+    for i, experience in enumerate(scenario.train_stream):
+        print("Start of experience: ", experience.current_experience)
+        strategy.train(experience)
+        print('Training completed')
+        print('Computing accuracy on the test set')
+        res = strategy.eval(scenario.test_stream[:i+1])
+        results.append(res)
+
+    print('Results = ' + str(results))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.')
+    parser.add_argument('--momentum', type=float, default=0.9, help='Momentum')
+    parser.add_argument('--epochs', type=int, default=10,
+                        help='Number of training epochs.')
+    parser.add_argument('--batch_size', type=int, default=128,
+                        help='Batch size.')
+    parser.add_argument('--siw_batch_size', type=int, default=128,
+                        help='Batch size used to extract scores.')
+    parser.add_argument('--siw_num_workers', type=int, default=8,
+                        help='Number of workers used to extract scores.')
+    parser.add_argument('--siw_layer_name', type=str, default='fc',
+                        help='Name of the last fully connected layer.')
+    parser.add_argument('--cuda', type=int, default=0,
+                        help='Specify GPU id to use. Use CPU if -1.')
+    args = parser.parse_args()
+
+    main(args)
diff --git a/tests/test_strategies.py b/tests/test_strategies.py
index 1aeb8584e..ea6394554 100644
--- a/tests/test_strategies.py
+++ b/tests/test_strategies.py
@@ -30,7 +30,7 @@
 from avalanche.training.plugins import EvaluationPlugin
 from avalanche.training.strategies import Naive, Replay, CWRStar, \
     GDumb, LwF, AGEM, GEM, EWC, \
-    SynapticIntelligence, JointTraining
+    SynapticIntelligence, JointTraining, SIW
 from avalanche.training.strategies.ar1 import AR1
 from avalanche.training.strategies.cumulative import Cumulative
 from avalanche.benchmarks import nc_scenario, SplitCIFAR10
@@ -227,17 +227,17 @@ def test_gdumb(self):
         # SIT scenario
         my_nc_scenario = self.load_scenario(fast_test=self.fast_test)
         strategy = GDumb(
-                model, optimizer, criterion,
-                mem_size=200, train_mb_size=64, device=self.device,
-                eval_mb_size=50, train_epochs=2
+            model, optimizer, criterion,
+            mem_size=200, train_mb_size=64, device=self.device,
+            eval_mb_size=50, train_epochs=2
         )
         self.run_strategy(my_nc_scenario, strategy)
 
         # MT scenario
         strategy = GDumb(
-                model, optimizer, criterion,
-                mem_size=200, train_mb_size=64, device=self.device,
-                eval_mb_size=50, train_epochs=2
+            model, optimizer, criterion,
+            mem_size=200, train_mb_size=64, device=self.device,
+            eval_mb_size=50, train_epochs=2
         )
         scenario = self.load_scenario(fast_test=self.fast_test,
                                       use_task_labels=True)
@@ -271,7 +271,7 @@ def test_lwf(self):
         # SIT scenario
         my_nc_scenario = self.load_scenario(fast_test=self.fast_test)
         strategy = LwF(model, optimizer, criterion,
-                       alpha=[0, 1/2, 2*(2/3), 3*(3/4), 4*(4/5)],
+                       alpha=[0, 1 / 2, 2 * (2 / 3), 3 * (3 / 4), 4 * (4 / 5)],
                        temperature=2, device=self.device,
                        train_mb_size=10, eval_mb_size=50,
                        train_epochs=2)
@@ -279,7 +279,7 @@ def test_lwf(self):
 
         # MT scenario
         strategy = LwF(model, optimizer, criterion,
-                       alpha=[0, 1/2, 2*(2/3), 3*(3/4), 4*(4/5)],
+                       alpha=[0, 1 / 2, 2 * (2 / 3), 3 * (3 / 4), 4 * (4 / 5)],
                        temperature=2, device=self.device,
                        train_mb_size=10, eval_mb_size=50,
                        train_epochs=2)
@@ -408,6 +408,26 @@ def test_ar1(self):
 
         self.run_strategy(my_nc_scenario, strategy)
 
+    def test_siw(self):
+        model = self.get_model(fast_test=self.fast_test)
+        optimizer = SGD(model.parameters(), lr=0.1)
+        criterion = CrossEntropyLoss()
+
+        # SIT scenario
+        my_nc_scenario = self.load_scenario(fast_test=self.fast_test)
+        strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier',
+                       batch_size=32, num_workers=8, train_mb_size=128,
+                       device=self.device, eval_mb_size=32, train_epochs=2)
+        self.run_strategy(my_nc_scenario, strategy)
+
+        # MT scenario
+        strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier',
+                       batch_size=32, num_workers=8, train_mb_size=128,
+                       device=self.device, eval_mb_size=32, train_epochs=2)
+        scenario = self.load_scenario(fast_test=self.fast_test,
+                                      use_task_labels=True)
+        self.run_strategy(scenario, strategy)
+
     def load_ar1_scenario(self, fast_test=False):
         """
         Returns a NC Scenario from a fake dataset of 10 classes, 5 experiences,

From 4c29426a0b3f3b8edaa9e515a94a6a08b00b400a Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Tue, 20 Apr 2021 12:28:44 +0200
Subject: [PATCH 3/8] add examples

---
 examples/siw_cifar100.py   |  18 ++++-
 examples/siw_cifar100_2.py | 135 +++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 2 deletions(-)
 create mode 100644 examples/siw_cifar100_2.py

diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py
index c661ee307..c62c377cd 100644
--- a/examples/siw_cifar100.py
+++ b/examples/siw_cifar100.py
@@ -4,7 +4,9 @@
 from avalanche.models import SimpleMLP
 from avalanche.training.strategies import Naive
 from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \
-    AGEMPlugin, SIWPlugin
+    AGEMPlugin, SIWPlugin, EvaluationPlugin
+from avalanche.logging import InteractiveLogger, TextLogger
+from avalanche.evaluation.metrics import accuracy_metrics
 import torchvision
 from avalanche.benchmarks.generators import filelist_scenario, \
     dataset_scenario, tensor_scenario, paths_scenario
@@ -30,10 +32,22 @@ def main(args):
     siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name,
                     batch_size=args.siw_batch_size,
                     num_workers=args.siw_num_workers)
+
+    # log to text file
+    text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a'))
+
+    # print to stdout
+    interactive_logger = InteractiveLogger()
+
+    eval_plugin = EvaluationPlugin(
+        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
+        loggers=[interactive_logger, text_logger]
+    )
+
     optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
     criterion = CrossEntropyLoss()
     strategy = Naive(model, optimizer, criterion, plugins=[siw],
-                     device=device, train_epochs=args.epochs)
+                     device=device, train_epochs=args.epochs, evaluator=eval_plugin)
 
     normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
                                      std=[0.2007, 0.1999, 0.1992])
diff --git a/examples/siw_cifar100_2.py b/examples/siw_cifar100_2.py
new file mode 100644
index 000000000..36d185014
--- /dev/null
+++ b/examples/siw_cifar100_2.py
@@ -0,0 +1,135 @@
+from avalanche.benchmarks.classic import SplitCIFAR100
+from torch.optim import SGD
+from torch.nn import CrossEntropyLoss
+from avalanche.models import SimpleMLP
+from avalanche.training.strategies import Naive
+from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \
+    AGEMPlugin, SIWPlugin, EvaluationPlugin
+from avalanche.logging import InteractiveLogger, TextLogger
+from avalanche.evaluation.metrics import accuracy_metrics
+import torchvision
+from avalanche.benchmarks.generators import filelist_scenario, \
+    dataset_scenario, tensor_scenario, paths_scenario
+from torchvision.transforms import Compose, CenterCrop, Normalize, \
+    Scale, Resize, ToTensor, ToPILImage
+import torchvision.transforms as transforms
+import torch.nn as nn
+import torch
+from torch.autograd import Variable
+import argparse
+
+
+def main(args):
+    # check if selected GPU is available or use CPU
+    assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
+    device = torch.device(f"cuda:{args.cuda}"
+                          if torch.cuda.is_available()
+                          and args.cuda >= 0 else "cpu")
+    print(f'Using device: {device}')
+    #############################################
+    model = torchvision.models.resnet18(num_classes=100).to(device)
+
+    siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name,
+                    batch_size=args.siw_batch_size,
+                    num_workers=args.siw_num_workers)
+
+    # log to text file
+    text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a'))
+
+    # print to stdout
+    interactive_logger = InteractiveLogger()
+
+    eval_plugin = EvaluationPlugin(
+        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
+        loggers=[interactive_logger, text_logger]
+    )
+
+    optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
+    criterion = CrossEntropyLoss()
+    strategy = Naive(model, optimizer, criterion, plugins=[siw],
+                     device=device, train_epochs=args.epochs, evaluator=eval_plugin)
+
+    normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
+                                     std=[0.2007, 0.1999, 0.1992])
+
+    train_transform = transforms.Compose([
+        transforms.RandomResizedCrop(224),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        normalize])
+
+    test_transform = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        normalize])
+
+    # # scenario
+    # scenario = SplitCIFAR100(n_experiences=10, return_task_id=False,
+    #                          seed=1234, train_transform=train_transform,
+    #                          eval_transform=test_transform)
+
+    # scenario
+    scenario = filelist_scenario(
+        root="",
+        train_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch1",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch2",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch3",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch4",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch5",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch6",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch7",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch8",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch9",
+                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch10"],
+
+        test_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch1",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch2",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch3",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch4",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch5",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch6",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch7",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch8",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch9",
+                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch10"],
+
+        task_labels=list(range(0, 10)),
+        complete_test_set_only=False,
+        train_transform=train_transform,
+        eval_transform=test_transform,
+    )
+
+    # TRAINING LOOP
+    print('Starting experiment...')
+    results = []
+    for i, experience in enumerate(scenario.train_stream):
+        print("Start of experience: ", experience.current_experience)
+        strategy.train(experience)
+        print('Training completed')
+        print('Computing accuracy on the test set')
+        res = strategy.eval(scenario.test_stream[i])
+        results.append(res)
+
+    print('Results = ' + str(results))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.')
+    parser.add_argument('--momentum', type=float, default=0.9, help='Momentum')
+    parser.add_argument('--epochs', type=int, default=10,
+                        help='Number of training epochs.')
+    parser.add_argument('--batch_size', type=int, default=128,
+                        help='Batch size.')
+    parser.add_argument('--siw_batch_size', type=int, default=128,
+                        help='Batch size used to extract scores.')
+    parser.add_argument('--siw_num_workers', type=int, default=8,
+                        help='Number of workers used to extract scores.')
+    parser.add_argument('--siw_layer_name', type=str, default='fc',
+                        help='Name of the last fully connected layer.')
+    parser.add_argument('--cuda', type=int, default=0,
+                        help='Specify GPU id to use. Use CPU if -1.')
+    args = parser.parse_args()
+
+    main(args)

From f86f3a49caf2dfca821366258d52b0ca891fd033 Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Thu, 22 Apr 2021 10:36:57 +0200
Subject: [PATCH 4/8] test_strategies modified

---
 tests/test_strategies.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tests/test_strategies.py b/tests/test_strategies.py
index 7c1fecbbd..3bb8c0c4b 100644
--- a/tests/test_strategies.py
+++ b/tests/test_strategies.py
@@ -308,12 +308,8 @@ def test_ar1(self):
         self.run_strategy(my_nc_scenario, strategy)
 
     def test_siw(self):
-        model = self.get_model(fast_test=self.fast_test)
-        optimizer = SGD(model.parameters(), lr=0.1)
-        criterion = CrossEntropyLoss()
-
         # SIT scenario
-        my_nc_scenario = self.load_scenario(fast_test=self.fast_test)
+        model, optimizer, criterion, my_nc_scenario = self.init_sit()
         strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier',
                        batch_size=32, num_workers=8, train_mb_size=128,
                        device=self.device, eval_mb_size=32, train_epochs=2)
@@ -323,8 +319,7 @@ def test_siw(self):
         strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier',
                        batch_size=32, num_workers=8, train_mb_size=128,
                        device=self.device, eval_mb_size=32, train_epochs=2)
-        scenario = self.load_scenario(fast_test=self.fast_test,
-                                      use_task_labels=True)
+        scenario = self.load_scenario(use_task_labels=False)
         self.run_strategy(scenario, strategy)
 
     def load_ar1_scenario(self):
@@ -370,14 +365,14 @@ def run_strategy(self, scenario, cl_strategy):
         print('Starting experiment...')
         cl_strategy.evaluator.loggers = [TextLogger(sys.stdout)]
         results = []
-        for train_batch_info in scenario.train_stream:
+        for i, train_batch_info in enumerate(scenario.train_stream):
             print("Start of experience ", train_batch_info.current_experience)
 
             cl_strategy.train(train_batch_info)
             print('Training completed')
 
             print('Computing accuracy on the current test set')
-            results.append(cl_strategy.eval(scenario.test_stream[:]))
+            results.append(cl_strategy.eval(scenario.test_stream[:i+1]))
 
 
 if __name__ == '__main__':

From ac3e171c502845a0f85ce9d76a343ef9496fd071 Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Thu, 22 Apr 2021 11:04:58 +0200
Subject: [PATCH 5/8] fix unittest

---
 avalanche/training/plugins/siw.py | 15 ++++++++++-----
 examples/siw_cifar100.py          | 11 ++++-------
 tests/test_strategies.py          | 21 +++++++++++++++++----
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/avalanche/training/plugins/siw.py b/avalanche/training/plugins/siw.py
index 9f8555ecf..1d71a471a 100644
--- a/avalanche/training/plugins/siw.py
+++ b/avalanche/training/plugins/siw.py
@@ -91,9 +91,11 @@ class weights and bias, to let only the feature extractor
     @torch.no_grad()
     def after_training_exp(self, strategy, **kwargs):
         """
-        Extract new class images' scores and compute the model
-        confidence at each incremental state
+        Before evaluating the performance of our model,
+        we extract new class images' scores and compute the
+        model's confidence at each incremental state
         """
+        # extract training scores
         strategy.model.eval()
 
         dataset = strategy.experience.dataset
@@ -101,6 +103,7 @@ def after_training_exp(self, strategy, **kwargs):
             dataset, batch_size=self.batch_size,
             num_workers=self.num_workers)
 
+        # compute model's confidence
         max_top1_scores = []
         for i, data in enumerate(loader):
             inputs, targets, task_labels = data
@@ -112,12 +115,14 @@ def after_training_exp(self, strategy, **kwargs):
         self.confidences.append(sum(max_top1_scores) /
                                 len(max_top1_scores))
 
+    @torch.no_grad()
     def before_eval_exp(self, strategy, **kwargs):
         """
-        Before evaluating the performance of our model, we standardize
-        all class weights (by subtracting their mean and dividing by
-        their standard deviation)
+        Standardize all class weights (by subtracting their mean
+        and dividing by their standard deviation)
         """
+
+        # standardize last layer weights
         last_layer = self.get_siw_layer()
         if last_layer is None:
             raise RuntimeError('Can\'t find this Linear layer')
diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py
index c62c377cd..edf27f01c 100644
--- a/examples/siw_cifar100.py
+++ b/examples/siw_cifar100.py
@@ -33,15 +33,12 @@ def main(args):
                     batch_size=args.siw_batch_size,
                     num_workers=args.siw_num_workers)
 
-    # log to text file
-    text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a'))
-
     # print to stdout
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
         accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
-        loggers=[interactive_logger, text_logger]
+        loggers=[interactive_logger]
     )
 
     optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
@@ -66,7 +63,7 @@ def main(args):
 
     # scenario
     scenario = SplitCIFAR100(n_experiences=10, return_task_id=False,
-                             seed=1234, train_transform=train_transform,
+                             fixed_class_order=range(0, 100), train_transform=train_transform,
                              eval_transform=test_transform)
 
     # TRAINING LOOP
@@ -87,7 +84,7 @@ def main(args):
     parser = argparse.ArgumentParser()
     parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.')
     parser.add_argument('--momentum', type=float, default=0.9, help='Momentum')
-    parser.add_argument('--epochs', type=int, default=10,
+    parser.add_argument('--epochs', type=int, default=100,
                         help='Number of training epochs.')
     parser.add_argument('--batch_size', type=int, default=128,
                         help='Batch size.')
@@ -97,7 +94,7 @@ def main(args):
                         help='Number of workers used to extract scores.')
     parser.add_argument('--siw_layer_name', type=str, default='fc',
                         help='Name of the last fully connected layer.')
-    parser.add_argument('--cuda', type=int, default=0,
+    parser.add_argument('--cuda', type=int, default=1,
                         help='Specify GPU id to use. Use CPU if -1.')
     args = parser.parse_args()
 
diff --git a/tests/test_strategies.py b/tests/test_strategies.py
index 9af00309c..08de3813a 100644
--- a/tests/test_strategies.py
+++ b/tests/test_strategies.py
@@ -307,20 +307,33 @@ def test_ar1(self):
                        rm_sz=200)
         self.run_strategy(my_nc_benchmark, strategy)
 
+    def run_siw(self, scenario, cl_strategy):
+        print('Starting experiment...')
+        cl_strategy.evaluator.loggers = [TextLogger(sys.stdout)]
+        results = []
+        for i, train_batch_info in enumerate(scenario.train_stream):
+            print("Start of experience ", train_batch_info.current_experience)
+
+            cl_strategy.train(train_batch_info)
+            print('Training completed')
+
+            print('Computing accuracy on the current test set')
+            results.append(cl_strategy.eval(scenario.test_stream[:i+1]))
+
     def test_siw(self):
         # SIT scenario
         model, optimizer, criterion, my_nc_scenario = self.init_sit()
         strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier',
                        batch_size=32, num_workers=8, train_mb_size=128,
                        device=self.device, eval_mb_size=32, train_epochs=2)
-        self.run_strategy(my_nc_scenario, strategy)
+        self.run_siw(my_nc_scenario, strategy)
 
         # MT scenario
         strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier',
                        batch_size=32, num_workers=8, train_mb_size=128,
                        device=self.device, eval_mb_size=32, train_epochs=2)
         scenario = self.load_scenario(use_task_labels=False)
-        self.run_strategy(scenario, strategy)
+        self.run_siw(scenario, strategy)
 
     def load_ar1_scenario(self):
         """
@@ -365,14 +378,14 @@ def run_strategy(self, scenario, cl_strategy):
         print('Starting experiment...')
         cl_strategy.evaluator.loggers = [TextLogger(sys.stdout)]
         results = []
-        for i, train_batch_info in enumerate(scenario.train_stream):
+        for train_batch_info in scenario.train_stream:
             print("Start of experience ", train_batch_info.current_experience)
 
             cl_strategy.train(train_batch_info)
             print('Training completed')
 
             print('Computing accuracy on the current test set')
-            results.append(cl_strategy.eval(scenario.test_stream[:i+1]))
+            results.append(cl_strategy.eval(scenario.test_stream[:]))
 
 
 if __name__ == '__main__':

From 113e1ab20e45b197b74067f5c465855546b61962 Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Thu, 22 Apr 2021 11:31:07 +0200
Subject: [PATCH 6/8] fix unittest

---
 examples/siw_cifar100_2.py | 135 -------------------------------------
 1 file changed, 135 deletions(-)
 delete mode 100644 examples/siw_cifar100_2.py

diff --git a/examples/siw_cifar100_2.py b/examples/siw_cifar100_2.py
deleted file mode 100644
index 36d185014..000000000
--- a/examples/siw_cifar100_2.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from avalanche.benchmarks.classic import SplitCIFAR100
-from torch.optim import SGD
-from torch.nn import CrossEntropyLoss
-from avalanche.models import SimpleMLP
-from avalanche.training.strategies import Naive
-from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \
-    AGEMPlugin, SIWPlugin, EvaluationPlugin
-from avalanche.logging import InteractiveLogger, TextLogger
-from avalanche.evaluation.metrics import accuracy_metrics
-import torchvision
-from avalanche.benchmarks.generators import filelist_scenario, \
-    dataset_scenario, tensor_scenario, paths_scenario
-from torchvision.transforms import Compose, CenterCrop, Normalize, \
-    Scale, Resize, ToTensor, ToPILImage
-import torchvision.transforms as transforms
-import torch.nn as nn
-import torch
-from torch.autograd import Variable
-import argparse
-
-
-def main(args):
-    # check if selected GPU is available or use CPU
-    assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
-    device = torch.device(f"cuda:{args.cuda}"
-                          if torch.cuda.is_available()
-                          and args.cuda >= 0 else "cpu")
-    print(f'Using device: {device}')
-    #############################################
-    model = torchvision.models.resnet18(num_classes=100).to(device)
-
-    siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name,
-                    batch_size=args.siw_batch_size,
-                    num_workers=args.siw_num_workers)
-
-    # log to text file
-    text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a'))
-
-    # print to stdout
-    interactive_logger = InteractiveLogger()
-
-    eval_plugin = EvaluationPlugin(
-        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
-        loggers=[interactive_logger, text_logger]
-    )
-
-    optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
-    criterion = CrossEntropyLoss()
-    strategy = Naive(model, optimizer, criterion, plugins=[siw],
-                     device=device, train_epochs=args.epochs, evaluator=eval_plugin)
-
-    normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
-                                     std=[0.2007, 0.1999, 0.1992])
-
-    train_transform = transforms.Compose([
-        transforms.RandomResizedCrop(224),
-        transforms.RandomHorizontalFlip(),
-        transforms.ToTensor(),
-        normalize])
-
-    test_transform = transforms.Compose([
-        transforms.Resize(256),
-        transforms.CenterCrop(224),
-        transforms.ToTensor(),
-        normalize])
-
-    # # scenario
-    # scenario = SplitCIFAR100(n_experiences=10, return_task_id=False,
-    #                          seed=1234, train_transform=train_transform,
-    #                          eval_transform=test_transform)
-
-    # scenario
-    scenario = filelist_scenario(
-        root="",
-        train_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch1",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch2",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch3",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch4",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch5",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch6",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch7",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch8",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch9",
-                          "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch10"],
-
-        test_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch1",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch2",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch3",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch4",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch5",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch6",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch7",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch8",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch9",
-                         "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch10"],
-
-        task_labels=list(range(0, 10)),
-        complete_test_set_only=False,
-        train_transform=train_transform,
-        eval_transform=test_transform,
-    )
-
-    # TRAINING LOOP
-    print('Starting experiment...')
-    results = []
-    for i, experience in enumerate(scenario.train_stream):
-        print("Start of experience: ", experience.current_experience)
-        strategy.train(experience)
-        print('Training completed')
-        print('Computing accuracy on the test set')
-        res = strategy.eval(scenario.test_stream[i])
-        results.append(res)
-
-    print('Results = ' + str(results))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.')
-    parser.add_argument('--momentum', type=float, default=0.9, help='Momentum')
-    parser.add_argument('--epochs', type=int, default=10,
-                        help='Number of training epochs.')
-    parser.add_argument('--batch_size', type=int, default=128,
-                        help='Batch size.')
-    parser.add_argument('--siw_batch_size', type=int, default=128,
-                        help='Batch size used to extract scores.')
-    parser.add_argument('--siw_num_workers', type=int, default=8,
-                        help='Number of workers used to extract scores.')
-    parser.add_argument('--siw_layer_name', type=str, default='fc',
-                        help='Name of the last fully connected layer.')
-    parser.add_argument('--cuda', type=int, default=0,
-                        help='Specify GPU id to use. Use CPU if -1.')
-    args = parser.parse_args()
-
-    main(args)

From 029882f49663b6378e9653f4c0caa55b2a3bc586 Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Thu, 22 Apr 2021 11:37:08 +0200
Subject: [PATCH 7/8] fix pep8

---
 examples/siw_cifar100.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py
index edf27f01c..3e398f59a 100644
--- a/examples/siw_cifar100.py
+++ b/examples/siw_cifar100.py
@@ -37,14 +37,16 @@ def main(args):
     interactive_logger = InteractiveLogger()
 
     eval_plugin = EvaluationPlugin(
-        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
+        accuracy_metrics(minibatch=False, epoch=True, experience=True,
+                         stream=True),
         loggers=[interactive_logger]
     )
 
     optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
     criterion = CrossEntropyLoss()
     strategy = Naive(model, optimizer, criterion, plugins=[siw],
-                     device=device, train_epochs=args.epochs, evaluator=eval_plugin)
+                     device=device, train_epochs=args.epochs,
+                     evaluator=eval_plugin)
 
     normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
                                      std=[0.2007, 0.1999, 0.1992])
@@ -63,7 +65,8 @@ def main(args):
 
     # scenario
     scenario = SplitCIFAR100(n_experiences=10, return_task_id=False,
-                             fixed_class_order=range(0, 100), train_transform=train_transform,
+                             fixed_class_order=range(0, 100),
+                             train_transform=train_transform,
                              eval_transform=test_transform)
 
     # TRAINING LOOP

From 4d1c761feb01cbe32de6c9593401f54cf3d90d4c Mon Sep 17 00:00:00 2001
From: BELOUADAH Eden <eden.belouadah@cea.fr>
Date: Mon, 26 Apr 2021 15:11:30 +0200
Subject: [PATCH 8/8] fix hyperparameters and unittest bugs

---
 avalanche/training/plugins/__init__.py        |   2 +-
 .../training/strategies/strategy_wrappers.py  |  44 -------
 examples/siw_cifar100.py                      | 117 +++++++++++++-----
 3 files changed, 85 insertions(+), 78 deletions(-)

diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py
index 6da8cd80f..2dd57ac4f 100644
--- a/avalanche/training/plugins/__init__.py
+++ b/avalanche/training/plugins/__init__.py
@@ -10,4 +10,4 @@
 from .strategy_plugin import StrategyPlugin
 from .synaptic_intelligence import SynapticIntelligencePlugin
 from .siw import SIWPlugin
-from .cope import CoPEPlugin, PPPloss
\ No newline at end of file
+from .cope import CoPEPlugin, PPPloss
diff --git a/avalanche/training/strategies/strategy_wrappers.py b/avalanche/training/strategies/strategy_wrappers.py
index 09d8fe137..873512abc 100644
--- a/avalanche/training/strategies/strategy_wrappers.py
+++ b/avalanche/training/strategies/strategy_wrappers.py
@@ -449,50 +449,6 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion,
             eval_every=eval_every
         )
 
-    def __init__(self, model: Module, optimizer: Optimizer, criterion,
-                 patterns_per_exp: int, memory_strength: float = 0.5,
-                 train_mb_size: int = 1, train_epochs: int = 1,
-                 eval_mb_size: int = None, device=None,
-                 plugins: Optional[List[StrategyPlugin]] = None,
-                 evaluator: EvaluationPlugin = default_logger, eval_every=-1):
-        """ Gradient Episodic Memory (GEM) strategy.
-            See GEM plugin for details.
-            This strategy does not use task identities.
-
-        :param model: The model.
-        :param optimizer: The optimizer to use.
-        :param criterion: The loss criterion to use.
-        :param patterns_per_exp: number of patterns per experience in the memory
-        :param memory_strength: offset to add to the projection direction
-            in order to favour backward transfer (gamma in original paper).
-        :param train_mb_size: The train minibatch size. Defaults to 1.
-        :param train_epochs: The number of training epochs. Defaults to 1.
-        :param eval_mb_size: The eval minibatch size. Defaults to 1.
-        :param device: The device to use. Defaults to None (cpu).
-        :param plugins: Plugins to be added. Defaults to None.
-        :param evaluator: (optional) instance of EvaluationPlugin for logging
-            and metric computations.
-        :param eval_every: the frequency of the calls to `eval` inside the
-            training loop.
-                if -1: no evaluation during training.
-                if  0: calls `eval` after the final epoch of each training
-                    experience.
-                if >0: calls `eval` every `eval_every` epochs and at the end
-                    of all the epochs for a single experience.
-        """
-
-        gem = GEMPlugin(patterns_per_exp, memory_strength)
-        if plugins is None:
-            plugins = [gem]
-        else:
-            plugins.append(gem)
-
-        super().__init__(
-            model, optimizer, criterion,
-            train_mb_size=train_mb_size, train_epochs=train_epochs,
-            eval_mb_size=eval_mb_size, device=device, plugins=plugins,
-            evaluator=evaluator, eval_every=eval_every)
-
 
 class SIW(BaseStrategy):
     def __init__(self, model: Module, optimizer: Optimizer, criterion,
diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py
index 3e398f59a..66cf49c1f 100644
--- a/examples/siw_cifar100.py
+++ b/examples/siw_cifar100.py
@@ -1,38 +1,59 @@
 from avalanche.benchmarks.classic import SplitCIFAR100
 from torch.optim import SGD
 from torch.nn import CrossEntropyLoss
-from avalanche.models import SimpleMLP
 from avalanche.training.strategies import Naive
-from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \
-    AGEMPlugin, SIWPlugin, EvaluationPlugin
-from avalanche.logging import InteractiveLogger, TextLogger
+from avalanche.training.plugins import SIWPlugin,\
+    EvaluationPlugin, StrategyPlugin
+from avalanche.logging import InteractiveLogger
 from avalanche.evaluation.metrics import accuracy_metrics
 import torchvision
-from avalanche.benchmarks.generators import filelist_scenario, \
-    dataset_scenario, tensor_scenario, paths_scenario
-from torchvision.transforms import Compose, CenterCrop, Normalize, \
-    Scale, Resize, ToTensor, ToPILImage
 import torchvision.transforms as transforms
 import torch.nn as nn
 import torch
-from torch.autograd import Variable
 import argparse
+from torch.optim import lr_scheduler
+
+
+class LRSchedulerPlugin(StrategyPlugin):
+    def __init__(self, lr_scheduler):
+        super().__init__()
+        self.lr_scheduler = lr_scheduler
+
+    def after_training_epoch(self, strategy: 'BaseStrategy', **kwargs):
+        self.lr_scheduler.step(strategy.loss.cpu().data.numpy())
+        lr = strategy.optimizer.param_groups[0]['lr']
+        print(f"\nlr = {lr}")
+
+
+class SetIncrementalHyperParams(StrategyPlugin):
+    def __init__(self, inc_exp_epochs, inc_exp_patience, first_exp_lr,
+                 lr_decay):
+        super().__init__()
+        self.inc_exp_epochs = inc_exp_epochs
+        self.inc_exp_patience = inc_exp_patience
+        self.first_exp_lr = first_exp_lr
+        self.lr_decay = lr_decay
+
+    def before_training_exp(self, strategy: 'BaseStrategy', **kwargs):
+        if strategy.experience.current_experience > 0:  # incremental update
+            strategy.train_epochs = self.inc_exp_epochs
+            strategy.optimizer.param_groups[0]['lr'] = \
+                self.first_exp_lr / strategy.experience.current_experience
+            strategy.scheduler = LRSchedulerPlugin(
+                lr_scheduler.ReduceLROnPlateau(strategy.optimizer,
+                                               patience=self.inc_exp_patience,
+                                               factor=self.lr_decay))
 
 
 def main(args):
     # check if selected GPU is available or use CPU
     assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
-    device = torch.device(f"cuda:{args.cuda}"
-                          if torch.cuda.is_available()
+    device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available()
                           and args.cuda >= 0 else "cpu")
     print(f'Using device: {device}')
     #############################################
     model = torchvision.models.resnet18(num_classes=100).to(device)
 
-    siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name,
-                    batch_size=args.siw_batch_size,
-                    num_workers=args.siw_num_workers)
-
     # print to stdout
     interactive_logger = InteractiveLogger()
 
@@ -42,14 +63,32 @@ def main(args):
         loggers=[interactive_logger]
     )
 
-    optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
+    optimizer = SGD(model.parameters(), lr=args.first_exp_lr,
+                    momentum=args.momentum,
+                    weight_decay=args.weight_decay)
     criterion = CrossEntropyLoss()
-    strategy = Naive(model, optimizer, criterion, plugins=[siw],
-                     device=device, train_epochs=args.epochs,
-                     evaluator=eval_plugin)
+    scheduler = LRSchedulerPlugin(
+        lr_scheduler.ReduceLROnPlateau(optimizer,
+                                       patience=args.first_exp_patience,
+                                       factor=args.lr_decay))
+    incremental_params = SetIncrementalHyperParams(args.inc_exp_epochs,
+                                                   args.inc_exp_patience,
+                                                   args.first_exp_lr,
+                                                   args.lr_decay)
+
+    siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name,
+                    batch_size=args.eval_batch_size,
+                    num_workers=args.num_workers)
+
+    strategy = Naive(model, optimizer, criterion,
+                     device=device, train_epochs=args.first_exp_epochs,
+                     evaluator=eval_plugin,
+                     plugins=[siw, scheduler, incremental_params],
+                     train_mb_size=args.train_batch_size,
+                     eval_mb_size=args.eval_batch_size)
 
-    normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255],
-                                     std=[0.2007, 0.1999, 0.1992])
+    normalize = transforms.Normalize(mean=[0.5071, 0.4866, 0.4409],
+                                     std=[0.2673, 0.2564, 0.2762])
 
     train_transform = transforms.Compose([
         transforms.RandomResizedCrop(224),
@@ -68,16 +107,16 @@ def main(args):
                              fixed_class_order=range(0, 100),
                              train_transform=train_transform,
                              eval_transform=test_transform)
-
     # TRAINING LOOP
     print('Starting experiment...')
     results = []
     for i, experience in enumerate(scenario.train_stream):
         print("Start of experience: ", experience.current_experience)
-        strategy.train(experience)
+        strategy.train(experience, num_workers=args.num_workers)
         print('Training completed')
         print('Computing accuracy on the test set')
-        res = strategy.eval(scenario.test_stream[:i+1])
+        res = strategy.eval(scenario.test_stream[:i + 1],
+                            num_workers=args.num_workers)
         results.append(res)
 
     print('Results = ' + str(results))
@@ -85,15 +124,27 @@ def main(args):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.')
-    parser.add_argument('--momentum', type=float, default=0.9, help='Momentum')
-    parser.add_argument('--epochs', type=int, default=100,
-                        help='Number of training epochs.')
-    parser.add_argument('--batch_size', type=int, default=128,
-                        help='Batch size.')
-    parser.add_argument('--siw_batch_size', type=int, default=128,
-                        help='Batch size used to extract scores.')
-    parser.add_argument('--siw_num_workers', type=int, default=8,
+    parser.add_argument('--first_exp_lr', type=float, default=0.1,
+                        help='Learning rate for the first experience.')
+    parser.add_argument('--momentum', type=float, default=0.9,
+                        help='Momentum')
+    parser.add_argument('--weight_decay', type=float, default=0.0005,
+                        help='Weight decay')
+    parser.add_argument('--lr_decay', type=float, default=0.1,
+                        help='LR decay')
+    parser.add_argument('--first_exp_patience', type=int, default=60,
+                        help='Patience in the first experience')
+    parser.add_argument('--inc_exp_patience', type=int, default=15,
+                        help='Patience in the incremental experiences')
+    parser.add_argument('--first_exp_epochs', type=int, default=300,
+                        help='Number of epochs in the first experience.')
+    parser.add_argument('--inc_exp_epochs', type=int, default=70,
+                        help='Number of epochs in each incremental experience.')
+    parser.add_argument('--train_batch_size', type=int, default=128,
+                        help='Training batch size.')
+    parser.add_argument('--eval_batch_size', type=int, default=32,
+                        help='Evaluation batch size.')
+    parser.add_argument('--num_workers', type=int, default=8,
                         help='Number of workers used to extract scores.')
     parser.add_argument('--siw_layer_name', type=str, default='fc',
                         help='Name of the last fully connected layer.')