From 7549700d458bf970225919f1b2f2f4b1628e6311 Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Mon, 12 Apr 2021 22:05:17 +0200 Subject: [PATCH 1/8] siw plugin added --- avalanche/training/plugins/__init__.py | 1 + avalanche/training/plugins/siw.py | 119 +++++++++++++++++++++++++ examples/siw_cifar100.py | 61 +++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 avalanche/training/plugins/siw.py create mode 100644 examples/siw_cifar100.py diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py index 0b2c96703..7a5326c4a 100644 --- a/avalanche/training/plugins/__init__.py +++ b/avalanche/training/plugins/__init__.py @@ -9,3 +9,4 @@ from .replay import ReplayPlugin from .strategy_plugin import StrategyPlugin from .synaptic_intelligence import SynapticIntelligencePlugin +from .siw import SIWPlugin diff --git a/avalanche/training/plugins/siw.py b/avalanche/training/plugins/siw.py new file mode 100644 index 000000000..26cf560f7 --- /dev/null +++ b/avalanche/training/plugins/siw.py @@ -0,0 +1,119 @@ +from torch.utils.data import random_split, ConcatDataset +from avalanche.benchmarks.utils import AvalancheConcatDataset +from avalanche.training.plugins.strategy_plugin import StrategyPlugin +from avalanche.benchmarks.utils.data_loader import \ + MultiTaskJoinedBatchDataLoader +import torch +import torch.cuda as tc +from torch.autograd import Variable +import torch.nn as nn + + +class SIWPlugin(StrategyPlugin): + """ + Standardization of Initial Weights (SIW) plugin. + From https://arxiv.org/pdf/2008.13710.pdf + + Performs past class initial weights replay and state-level score + calibration. The callbacks `before_training_exp`, `after_backward`, + `after_training_exp`,`before_eval_exp`, and `after_eval_forward` + are implemented. + + The `before_training_exp` callback is implemented in order to keep + track of the classes in each experience + + The `after_backward` callback is implemented in order to freeze past + class weights in the last fully connected layer + + The `after_training_exp` callback is implemented in order to extract + new class images' scores and compute the model confidence at + each incremental state. + + The `before_eval_exp` callback is implemented in order to standardize + initial weights before inference + + The`after_eval_forward` is implemented in order to apply state-level + calibration at the inference time + + The :batch_size: and :num_workers: parameters concern the new class + scores extraction. + """ + + def __init__(self, batch_size, num_workers): + super().__init__() + self.confidences = [] + self.classes_per_experience = [] + self.num_workers = num_workers + self.batch_size = batch_size + + def before_training_exp(self, strategy, **kwargs): + """ + Keep track of the classes encountered in each experience + """ + self.classes_per_experience.append( + strategy.experience.classes_in_this_experience) + + def after_backward(self, strategy, **kwargs): + """ + Before executing the optimization step to perform + back-propagation, we zero the gradients of past class + weights and bias. This is equivalent to freeze past + class weights and bias, to let only the feature extractor + and the new class weights and bias evolve + """ + previous_classes = len(strategy.experience.previous_classes) + strategy.model.fc.weight.grad[:previous_classes, :] = 0 + strategy.model.fc.bias.grad[:previous_classes] = 0 + + @torch.no_grad() + def after_training_exp(self, strategy, **kwargs): + """ + Extract new class images' scores and compute the model + confidence at each incremental state + """ + strategy.model.eval() + + dataset = strategy.experience.dataset + loader = torch.utils.data.DataLoader( + dataset, batch_size=self.batch_size, + num_workers=self.num_workers) + + max_top1_scores = [] + for i, data in enumerate(loader): + inputs, targets, task_labels = data + if tc.is_available(): + inputs = inputs.to(strategy.device) + inputs = Variable(inputs) + logits = strategy.model(inputs) + max_score = torch.max(logits, dim=1)[0].tolist() + max_top1_scores.extend(max_score) + self.confidences.append(sum(max_top1_scores) / + len(max_top1_scores)) + + def before_eval_exp(self, strategy, **kwargs): + """ + Before evaluating the performance of our model, we standardize + all class weights (by subtracting their mean and dividing by + their standard deviation) + """ + previous_classes = len(strategy.experience.previous_classes) + classes_seen_so_far = len(strategy.experience.classes_seen_so_far) + + for i in range(previous_classes, classes_seen_so_far): + mu = torch.mean(strategy.model.fc.weight[i]) + std = torch.std(strategy.model.fc.weight[i]) + + strategy.model.fc.weight.data[i] -= mu + strategy.model.fc.weight.data[i] /= std + + def after_eval_forward(self, strategy, **kwargs): + """ + Rectify past class scores by multiplying them by the model's + confidence in the current state and dividing them by the + model's confidence in the initial state in which a past + class was encountered for the first time + """ + for exp in range(len(self.confidences)): + strategy.logits[:, self.classes_per_experience[exp]] *=\ + self.confidences[strategy.experience.current_experience] \ + / self.confidences[exp] diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py new file mode 100644 index 000000000..c83ba6528 --- /dev/null +++ b/examples/siw_cifar100.py @@ -0,0 +1,61 @@ +from avalanche.benchmarks.classic import SplitCIFAR100 +from torch.optim import SGD +from torch.nn import CrossEntropyLoss +from avalanche.models import SimpleMLP +from avalanche.training.strategies import BaseStrategy +from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \ + AGEMPlugin, SIWPlugin +import torchvision +from avalanche.benchmarks.generators import filelist_scenario, \ + dataset_scenario, tensor_scenario, paths_scenario +from torchvision.transforms import Compose, CenterCrop, Normalize, \ + Scale, Resize, ToTensor, ToPILImage +import torchvision.transforms as transforms +import torch.nn as nn +import torch as th +import torch.cuda as tc +from torch.autograd import Variable + +################################################ +P = 10 # number of classes in each state +device = 'cuda:0' +############################################# +siw = SIWPlugin(batch_size=32, num_workers=8) +model = torchvision.models.resnet18(num_classes=100).to(device) +optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9) +criterion = CrossEntropyLoss() +strategy = BaseStrategy(model, optimizer, criterion, plugins=[siw], + device=device, train_epochs=10) + +normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], + std=[0.2007, 0.1999, 0.1992]) + +train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize]) + +test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize]) + +# scenario +scenario = SplitCIFAR100(n_experiences=10, return_task_id=False, + seed=1234, train_transform=train_transform, + eval_transform=test_transform) + +# TRAINING LOOP +print('Starting experiment...') +results = [] +for i, experience in enumerate(scenario.train_stream): + print("Start of experience: ", experience.current_experience) + strategy.train(experience) + print('Training completed') + print('Computing accuracy on the test set') + res = strategy.eval(scenario.test_stream[i]) + results.append(res) + +print('Results = ' + str(results)) From 089ffe1e58578ed61827bf4652c959c43be4f6e5 Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Sat, 17 Apr 2021 16:10:37 +0200 Subject: [PATCH 2/8] fix some code issues --- avalanche/training/plugins/siw.py | 47 +++++-- .../training/strategies/strategy_wrappers.py | 96 +++++++++++++- examples/siw_cifar100.py | 121 +++++++++++------- tests/test_strategies.py | 38 ++++-- 4 files changed, 234 insertions(+), 68 deletions(-) diff --git a/avalanche/training/plugins/siw.py b/avalanche/training/plugins/siw.py index 26cf560f7..9f8555ecf 100644 --- a/avalanche/training/plugins/siw.py +++ b/avalanche/training/plugins/siw.py @@ -7,6 +7,9 @@ import torch.cuda as tc from torch.autograd import Variable import torch.nn as nn +from avalanche.training.utils import get_last_fc_layer, get_layer_by_name +from typing import Optional +from torch.nn import Linear class SIWPlugin(StrategyPlugin): @@ -35,17 +38,33 @@ class weights in the last fully connected layer The`after_eval_forward` is implemented in order to apply state-level calibration at the inference time + The :siw_layer_name: parameter concerns the name of the last fully + connected layer of the network + The :batch_size: and :num_workers: parameters concern the new class scores extraction. """ - def __init__(self, batch_size, num_workers): + def __init__(self, model, siw_layer_name='fc', batch_size=32, + num_workers=0): super().__init__() self.confidences = [] self.classes_per_experience = [] + self.model = model + self.siw_layer_name = siw_layer_name self.num_workers = num_workers self.batch_size = batch_size + def get_siw_layer(self) -> Optional[Linear]: + result = None + if self.siw_layer_name is None: + last_fc = get_last_fc_layer(self.model) + if last_fc is not None: + result = last_fc[1] + else: + result = get_layer_by_name(self.model, self.siw_layer_name) + return result + def before_training_exp(self, strategy, **kwargs): """ Keep track of the classes encountered in each experience @@ -62,8 +81,12 @@ class weights and bias, to let only the feature extractor and the new class weights and bias evolve """ previous_classes = len(strategy.experience.previous_classes) - strategy.model.fc.weight.grad[:previous_classes, :] = 0 - strategy.model.fc.bias.grad[:previous_classes] = 0 + last_layer = self.get_siw_layer() + if last_layer is None: + raise RuntimeError('Can\'t find this Linear layer') + + last_layer.weight.grad[:previous_classes, :] = 0 + last_layer.bias.grad[:previous_classes] = 0 @torch.no_grad() def after_training_exp(self, strategy, **kwargs): @@ -83,7 +106,6 @@ def after_training_exp(self, strategy, **kwargs): inputs, targets, task_labels = data if tc.is_available(): inputs = inputs.to(strategy.device) - inputs = Variable(inputs) logits = strategy.model(inputs) max_score = torch.max(logits, dim=1)[0].tolist() max_top1_scores.extend(max_score) @@ -96,15 +118,18 @@ def before_eval_exp(self, strategy, **kwargs): all class weights (by subtracting their mean and dividing by their standard deviation) """ - previous_classes = len(strategy.experience.previous_classes) + last_layer = self.get_siw_layer() + if last_layer is None: + raise RuntimeError('Can\'t find this Linear layer') + classes_seen_so_far = len(strategy.experience.classes_seen_so_far) - for i in range(previous_classes, classes_seen_so_far): - mu = torch.mean(strategy.model.fc.weight[i]) - std = torch.std(strategy.model.fc.weight[i]) + for i in range(classes_seen_so_far): + mu = torch.mean(last_layer.weight[i]) + std = torch.std(last_layer.weight[i]) - strategy.model.fc.weight.data[i] -= mu - strategy.model.fc.weight.data[i] /= std + last_layer.weight.data[i] -= mu + last_layer.weight.data[i] /= std def after_eval_forward(self, strategy, **kwargs): """ @@ -114,6 +139,6 @@ def after_eval_forward(self, strategy, **kwargs): class was encountered for the first time """ for exp in range(len(self.confidences)): - strategy.logits[:, self.classes_per_experience[exp]] *=\ + strategy.logits[:, self.classes_per_experience[exp]] *= \ self.confidences[strategy.experience.current_experience] \ / self.confidences[exp] diff --git a/avalanche/training/strategies/strategy_wrappers.py b/avalanche/training/strategies/strategy_wrappers.py index d2c876baa..6cf59fb9e 100644 --- a/avalanche/training/strategies/strategy_wrappers.py +++ b/avalanche/training/strategies/strategy_wrappers.py @@ -16,7 +16,7 @@ from avalanche.training import default_logger from avalanche.training.plugins import StrategyPlugin, CWRStarPlugin, \ ReplayPlugin, GDumbPlugin, LwFPlugin, AGEMPlugin, GEMPlugin, EWCPlugin, \ - EvaluationPlugin, SynapticIntelligencePlugin + EvaluationPlugin, SynapticIntelligencePlugin, SIWPlugin from avalanche.training.strategies.base_strategy import BaseStrategy @@ -444,6 +444,97 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion, eval_every=eval_every ) + def __init__(self, model: Module, optimizer: Optimizer, criterion, + patterns_per_exp: int, memory_strength: float = 0.5, + train_mb_size: int = 1, train_epochs: int = 1, + eval_mb_size: int = None, device=None, + plugins: Optional[List[StrategyPlugin]] = None, + evaluator: EvaluationPlugin = default_logger, eval_every=-1): + """ Gradient Episodic Memory (GEM) strategy. + See GEM plugin for details. + This strategy does not use task identities. + + :param model: The model. + :param optimizer: The optimizer to use. + :param criterion: The loss criterion to use. + :param patterns_per_exp: number of patterns per experience in the memory + :param memory_strength: offset to add to the projection direction + in order to favour backward transfer (gamma in original paper). + :param train_mb_size: The train minibatch size. Defaults to 1. + :param train_epochs: The number of training epochs. Defaults to 1. + :param eval_mb_size: The eval minibatch size. Defaults to 1. + :param device: The device to use. Defaults to None (cpu). + :param plugins: Plugins to be added. Defaults to None. + :param evaluator: (optional) instance of EvaluationPlugin for logging + and metric computations. + :param eval_every: the frequency of the calls to `eval` inside the + training loop. + if -1: no evaluation during training. + if 0: calls `eval` after the final epoch of each training + experience. + if >0: calls `eval` every `eval_every` epochs and at the end + of all the epochs for a single experience. + """ + + gem = GEMPlugin(patterns_per_exp, memory_strength) + if plugins is None: + plugins = [gem] + else: + plugins.append(gem) + + super().__init__( + model, optimizer, criterion, + train_mb_size=train_mb_size, train_epochs=train_epochs, + eval_mb_size=eval_mb_size, device=device, plugins=plugins, + evaluator=evaluator, eval_every=eval_every) + + +class SIW(BaseStrategy): + def __init__(self, model: Module, optimizer: Optimizer, criterion, + siw_layer_name: str = 'fc', + batch_size: int = 32, num_workers: int = 0, + train_mb_size: int = 1, train_epochs: int = 1, + eval_mb_size: int = None, device=None, + plugins: Optional[List[StrategyPlugin]] = None, + evaluator: EvaluationPlugin = default_logger, eval_every=-1): + """ Standardization of Initial Weights (SIW) strategy. + See SIW plugin for details. + This strategy does not use task identities. + + :param model: The model. + :param optimizer: The optimizer to use. + :param criterion: The loss criterion to use. + :param siw_layer_name: The name of the last fully connected layer + :param num_workers: The number of workers used to load batches + :param batch_size: The batch size used to extract scores + :param train_mb_size: The train minibatch size. Defaults to 1. + :param train_epochs: The number of training epochs. Defaults to 1. + :param eval_mb_size: The eval minibatch size. Defaults to 1. + :param device: The device to use. Defaults to None (cpu). + :param plugins: Plugins to be added. Defaults to None. + :param evaluator: (optional) instance of EvaluationPlugin for logging + and metric computations. + :param eval_every: the frequency of the calls to `eval` inside the + training loop. + if -1: no evaluation during training. + if 0: calls `eval` after the final epoch of each training + experience. + if >0: calls `eval` every `eval_every` epochs and at the end + of all the epochs for a single experience. + """ + + siw = SIWPlugin(model, siw_layer_name, batch_size, num_workers) + if plugins is None: + plugins = [siw] + else: + plugins.append(siw) + + super().__init__( + model, optimizer, criterion, + train_mb_size=train_mb_size, train_epochs=train_epochs, + eval_mb_size=eval_mb_size, device=device, plugins=plugins, + evaluator=evaluator, eval_every=eval_every) + __all__ = [ 'Naive', @@ -454,5 +545,6 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion, 'AGEM', 'GEM', 'EWC', - 'SynapticIntelligence' + 'SynapticIntelligence', + 'SIW' ] diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py index c83ba6528..c661ee307 100644 --- a/examples/siw_cifar100.py +++ b/examples/siw_cifar100.py @@ -2,7 +2,7 @@ from torch.optim import SGD from torch.nn import CrossEntropyLoss from avalanche.models import SimpleMLP -from avalanche.training.strategies import BaseStrategy +from avalanche.training.strategies import Naive from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \ AGEMPlugin, SIWPlugin import torchvision @@ -12,50 +12,79 @@ Scale, Resize, ToTensor, ToPILImage import torchvision.transforms as transforms import torch.nn as nn -import torch as th -import torch.cuda as tc +import torch from torch.autograd import Variable +import argparse -################################################ -P = 10 # number of classes in each state -device = 'cuda:0' -############################################# -siw = SIWPlugin(batch_size=32, num_workers=8) -model = torchvision.models.resnet18(num_classes=100).to(device) -optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9) -criterion = CrossEntropyLoss() -strategy = BaseStrategy(model, optimizer, criterion, plugins=[siw], - device=device, train_epochs=10) - -normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], - std=[0.2007, 0.1999, 0.1992]) - -train_transform = transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize]) - -test_transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize]) - -# scenario -scenario = SplitCIFAR100(n_experiences=10, return_task_id=False, - seed=1234, train_transform=train_transform, - eval_transform=test_transform) - -# TRAINING LOOP -print('Starting experiment...') -results = [] -for i, experience in enumerate(scenario.train_stream): - print("Start of experience: ", experience.current_experience) - strategy.train(experience) - print('Training completed') - print('Computing accuracy on the test set') - res = strategy.eval(scenario.test_stream[i]) - results.append(res) - -print('Results = ' + str(results)) + +def main(args): + # check if selected GPU is available or use CPU + assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0." + device = torch.device(f"cuda:{args.cuda}" + if torch.cuda.is_available() + and args.cuda >= 0 else "cpu") + print(f'Using device: {device}') + ############################################# + model = torchvision.models.resnet18(num_classes=100).to(device) + + siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name, + batch_size=args.siw_batch_size, + num_workers=args.siw_num_workers) + optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum) + criterion = CrossEntropyLoss() + strategy = Naive(model, optimizer, criterion, plugins=[siw], + device=device, train_epochs=args.epochs) + + normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], + std=[0.2007, 0.1999, 0.1992]) + + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize]) + + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize]) + + # scenario + scenario = SplitCIFAR100(n_experiences=10, return_task_id=False, + seed=1234, train_transform=train_transform, + eval_transform=test_transform) + + # TRAINING LOOP + print('Starting experiment...') + results = [] + for i, experience in enumerate(scenario.train_stream): + print("Start of experience: ", experience.current_experience) + strategy.train(experience) + print('Training completed') + print('Computing accuracy on the test set') + res = strategy.eval(scenario.test_stream[:i+1]) + results.append(res) + + print('Results = ' + str(results)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.') + parser.add_argument('--momentum', type=float, default=0.9, help='Momentum') + parser.add_argument('--epochs', type=int, default=10, + help='Number of training epochs.') + parser.add_argument('--batch_size', type=int, default=128, + help='Batch size.') + parser.add_argument('--siw_batch_size', type=int, default=128, + help='Batch size used to extract scores.') + parser.add_argument('--siw_num_workers', type=int, default=8, + help='Number of workers used to extract scores.') + parser.add_argument('--siw_layer_name', type=str, default='fc', + help='Name of the last fully connected layer.') + parser.add_argument('--cuda', type=int, default=0, + help='Specify GPU id to use. Use CPU if -1.') + args = parser.parse_args() + + main(args) diff --git a/tests/test_strategies.py b/tests/test_strategies.py index 1aeb8584e..ea6394554 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -30,7 +30,7 @@ from avalanche.training.plugins import EvaluationPlugin from avalanche.training.strategies import Naive, Replay, CWRStar, \ GDumb, LwF, AGEM, GEM, EWC, \ - SynapticIntelligence, JointTraining + SynapticIntelligence, JointTraining, SIW from avalanche.training.strategies.ar1 import AR1 from avalanche.training.strategies.cumulative import Cumulative from avalanche.benchmarks import nc_scenario, SplitCIFAR10 @@ -227,17 +227,17 @@ def test_gdumb(self): # SIT scenario my_nc_scenario = self.load_scenario(fast_test=self.fast_test) strategy = GDumb( - model, optimizer, criterion, - mem_size=200, train_mb_size=64, device=self.device, - eval_mb_size=50, train_epochs=2 + model, optimizer, criterion, + mem_size=200, train_mb_size=64, device=self.device, + eval_mb_size=50, train_epochs=2 ) self.run_strategy(my_nc_scenario, strategy) # MT scenario strategy = GDumb( - model, optimizer, criterion, - mem_size=200, train_mb_size=64, device=self.device, - eval_mb_size=50, train_epochs=2 + model, optimizer, criterion, + mem_size=200, train_mb_size=64, device=self.device, + eval_mb_size=50, train_epochs=2 ) scenario = self.load_scenario(fast_test=self.fast_test, use_task_labels=True) @@ -271,7 +271,7 @@ def test_lwf(self): # SIT scenario my_nc_scenario = self.load_scenario(fast_test=self.fast_test) strategy = LwF(model, optimizer, criterion, - alpha=[0, 1/2, 2*(2/3), 3*(3/4), 4*(4/5)], + alpha=[0, 1 / 2, 2 * (2 / 3), 3 * (3 / 4), 4 * (4 / 5)], temperature=2, device=self.device, train_mb_size=10, eval_mb_size=50, train_epochs=2) @@ -279,7 +279,7 @@ def test_lwf(self): # MT scenario strategy = LwF(model, optimizer, criterion, - alpha=[0, 1/2, 2*(2/3), 3*(3/4), 4*(4/5)], + alpha=[0, 1 / 2, 2 * (2 / 3), 3 * (3 / 4), 4 * (4 / 5)], temperature=2, device=self.device, train_mb_size=10, eval_mb_size=50, train_epochs=2) @@ -408,6 +408,26 @@ def test_ar1(self): self.run_strategy(my_nc_scenario, strategy) + def test_siw(self): + model = self.get_model(fast_test=self.fast_test) + optimizer = SGD(model.parameters(), lr=0.1) + criterion = CrossEntropyLoss() + + # SIT scenario + my_nc_scenario = self.load_scenario(fast_test=self.fast_test) + strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier', + batch_size=32, num_workers=8, train_mb_size=128, + device=self.device, eval_mb_size=32, train_epochs=2) + self.run_strategy(my_nc_scenario, strategy) + + # MT scenario + strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier', + batch_size=32, num_workers=8, train_mb_size=128, + device=self.device, eval_mb_size=32, train_epochs=2) + scenario = self.load_scenario(fast_test=self.fast_test, + use_task_labels=True) + self.run_strategy(scenario, strategy) + def load_ar1_scenario(self, fast_test=False): """ Returns a NC Scenario from a fake dataset of 10 classes, 5 experiences, From 4c29426a0b3f3b8edaa9e515a94a6a08b00b400a Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Tue, 20 Apr 2021 12:28:44 +0200 Subject: [PATCH 3/8] add examples --- examples/siw_cifar100.py | 18 ++++- examples/siw_cifar100_2.py | 135 +++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 examples/siw_cifar100_2.py diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py index c661ee307..c62c377cd 100644 --- a/examples/siw_cifar100.py +++ b/examples/siw_cifar100.py @@ -4,7 +4,9 @@ from avalanche.models import SimpleMLP from avalanche.training.strategies import Naive from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \ - AGEMPlugin, SIWPlugin + AGEMPlugin, SIWPlugin, EvaluationPlugin +from avalanche.logging import InteractiveLogger, TextLogger +from avalanche.evaluation.metrics import accuracy_metrics import torchvision from avalanche.benchmarks.generators import filelist_scenario, \ dataset_scenario, tensor_scenario, paths_scenario @@ -30,10 +32,22 @@ def main(args): siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name, batch_size=args.siw_batch_size, num_workers=args.siw_num_workers) + + # log to text file + text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a')) + + # print to stdout + interactive_logger = InteractiveLogger() + + eval_plugin = EvaluationPlugin( + accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True), + loggers=[interactive_logger, text_logger] + ) + optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum) criterion = CrossEntropyLoss() strategy = Naive(model, optimizer, criterion, plugins=[siw], - device=device, train_epochs=args.epochs) + device=device, train_epochs=args.epochs, evaluator=eval_plugin) normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], std=[0.2007, 0.1999, 0.1992]) diff --git a/examples/siw_cifar100_2.py b/examples/siw_cifar100_2.py new file mode 100644 index 000000000..36d185014 --- /dev/null +++ b/examples/siw_cifar100_2.py @@ -0,0 +1,135 @@ +from avalanche.benchmarks.classic import SplitCIFAR100 +from torch.optim import SGD +from torch.nn import CrossEntropyLoss +from avalanche.models import SimpleMLP +from avalanche.training.strategies import Naive +from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \ + AGEMPlugin, SIWPlugin, EvaluationPlugin +from avalanche.logging import InteractiveLogger, TextLogger +from avalanche.evaluation.metrics import accuracy_metrics +import torchvision +from avalanche.benchmarks.generators import filelist_scenario, \ + dataset_scenario, tensor_scenario, paths_scenario +from torchvision.transforms import Compose, CenterCrop, Normalize, \ + Scale, Resize, ToTensor, ToPILImage +import torchvision.transforms as transforms +import torch.nn as nn +import torch +from torch.autograd import Variable +import argparse + + +def main(args): + # check if selected GPU is available or use CPU + assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0." + device = torch.device(f"cuda:{args.cuda}" + if torch.cuda.is_available() + and args.cuda >= 0 else "cpu") + print(f'Using device: {device}') + ############################################# + model = torchvision.models.resnet18(num_classes=100).to(device) + + siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name, + batch_size=args.siw_batch_size, + num_workers=args.siw_num_workers) + + # log to text file + text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a')) + + # print to stdout + interactive_logger = InteractiveLogger() + + eval_plugin = EvaluationPlugin( + accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True), + loggers=[interactive_logger, text_logger] + ) + + optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum) + criterion = CrossEntropyLoss() + strategy = Naive(model, optimizer, criterion, plugins=[siw], + device=device, train_epochs=args.epochs, evaluator=eval_plugin) + + normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], + std=[0.2007, 0.1999, 0.1992]) + + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize]) + + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize]) + + # # scenario + # scenario = SplitCIFAR100(n_experiences=10, return_task_id=False, + # seed=1234, train_transform=train_transform, + # eval_transform=test_transform) + + # scenario + scenario = filelist_scenario( + root="", + train_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch1", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch2", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch3", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch4", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch5", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch6", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch7", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch8", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch9", + "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch10"], + + test_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch1", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch2", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch3", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch4", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch5", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch6", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch7", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch8", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch9", + "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch10"], + + task_labels=list(range(0, 10)), + complete_test_set_only=False, + train_transform=train_transform, + eval_transform=test_transform, + ) + + # TRAINING LOOP + print('Starting experiment...') + results = [] + for i, experience in enumerate(scenario.train_stream): + print("Start of experience: ", experience.current_experience) + strategy.train(experience) + print('Training completed') + print('Computing accuracy on the test set') + res = strategy.eval(scenario.test_stream[i]) + results.append(res) + + print('Results = ' + str(results)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.') + parser.add_argument('--momentum', type=float, default=0.9, help='Momentum') + parser.add_argument('--epochs', type=int, default=10, + help='Number of training epochs.') + parser.add_argument('--batch_size', type=int, default=128, + help='Batch size.') + parser.add_argument('--siw_batch_size', type=int, default=128, + help='Batch size used to extract scores.') + parser.add_argument('--siw_num_workers', type=int, default=8, + help='Number of workers used to extract scores.') + parser.add_argument('--siw_layer_name', type=str, default='fc', + help='Name of the last fully connected layer.') + parser.add_argument('--cuda', type=int, default=0, + help='Specify GPU id to use. Use CPU if -1.') + args = parser.parse_args() + + main(args) From f86f3a49caf2dfca821366258d52b0ca891fd033 Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Thu, 22 Apr 2021 10:36:57 +0200 Subject: [PATCH 4/8] test_strategies modified --- tests/test_strategies.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/test_strategies.py b/tests/test_strategies.py index 7c1fecbbd..3bb8c0c4b 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -308,12 +308,8 @@ def test_ar1(self): self.run_strategy(my_nc_scenario, strategy) def test_siw(self): - model = self.get_model(fast_test=self.fast_test) - optimizer = SGD(model.parameters(), lr=0.1) - criterion = CrossEntropyLoss() - # SIT scenario - my_nc_scenario = self.load_scenario(fast_test=self.fast_test) + model, optimizer, criterion, my_nc_scenario = self.init_sit() strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier', batch_size=32, num_workers=8, train_mb_size=128, device=self.device, eval_mb_size=32, train_epochs=2) @@ -323,8 +319,7 @@ def test_siw(self): strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier', batch_size=32, num_workers=8, train_mb_size=128, device=self.device, eval_mb_size=32, train_epochs=2) - scenario = self.load_scenario(fast_test=self.fast_test, - use_task_labels=True) + scenario = self.load_scenario(use_task_labels=False) self.run_strategy(scenario, strategy) def load_ar1_scenario(self): @@ -370,14 +365,14 @@ def run_strategy(self, scenario, cl_strategy): print('Starting experiment...') cl_strategy.evaluator.loggers = [TextLogger(sys.stdout)] results = [] - for train_batch_info in scenario.train_stream: + for i, train_batch_info in enumerate(scenario.train_stream): print("Start of experience ", train_batch_info.current_experience) cl_strategy.train(train_batch_info) print('Training completed') print('Computing accuracy on the current test set') - results.append(cl_strategy.eval(scenario.test_stream[:])) + results.append(cl_strategy.eval(scenario.test_stream[:i+1])) if __name__ == '__main__': From ac3e171c502845a0f85ce9d76a343ef9496fd071 Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Thu, 22 Apr 2021 11:04:58 +0200 Subject: [PATCH 5/8] fix unittest --- avalanche/training/plugins/siw.py | 15 ++++++++++----- examples/siw_cifar100.py | 11 ++++------- tests/test_strategies.py | 21 +++++++++++++++++---- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/avalanche/training/plugins/siw.py b/avalanche/training/plugins/siw.py index 9f8555ecf..1d71a471a 100644 --- a/avalanche/training/plugins/siw.py +++ b/avalanche/training/plugins/siw.py @@ -91,9 +91,11 @@ class weights and bias, to let only the feature extractor @torch.no_grad() def after_training_exp(self, strategy, **kwargs): """ - Extract new class images' scores and compute the model - confidence at each incremental state + Before evaluating the performance of our model, + we extract new class images' scores and compute the + model's confidence at each incremental state """ + # extract training scores strategy.model.eval() dataset = strategy.experience.dataset @@ -101,6 +103,7 @@ def after_training_exp(self, strategy, **kwargs): dataset, batch_size=self.batch_size, num_workers=self.num_workers) + # compute model's confidence max_top1_scores = [] for i, data in enumerate(loader): inputs, targets, task_labels = data @@ -112,12 +115,14 @@ def after_training_exp(self, strategy, **kwargs): self.confidences.append(sum(max_top1_scores) / len(max_top1_scores)) + @torch.no_grad() def before_eval_exp(self, strategy, **kwargs): """ - Before evaluating the performance of our model, we standardize - all class weights (by subtracting their mean and dividing by - their standard deviation) + Standardize all class weights (by subtracting their mean + and dividing by their standard deviation) """ + + # standardize last layer weights last_layer = self.get_siw_layer() if last_layer is None: raise RuntimeError('Can\'t find this Linear layer') diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py index c62c377cd..edf27f01c 100644 --- a/examples/siw_cifar100.py +++ b/examples/siw_cifar100.py @@ -33,15 +33,12 @@ def main(args): batch_size=args.siw_batch_size, num_workers=args.siw_num_workers) - # log to text file - text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a')) - # print to stdout interactive_logger = InteractiveLogger() eval_plugin = EvaluationPlugin( accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True), - loggers=[interactive_logger, text_logger] + loggers=[interactive_logger] ) optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum) @@ -66,7 +63,7 @@ def main(args): # scenario scenario = SplitCIFAR100(n_experiences=10, return_task_id=False, - seed=1234, train_transform=train_transform, + fixed_class_order=range(0, 100), train_transform=train_transform, eval_transform=test_transform) # TRAINING LOOP @@ -87,7 +84,7 @@ def main(args): parser = argparse.ArgumentParser() parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.') parser.add_argument('--momentum', type=float, default=0.9, help='Momentum') - parser.add_argument('--epochs', type=int, default=10, + parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs.') parser.add_argument('--batch_size', type=int, default=128, help='Batch size.') @@ -97,7 +94,7 @@ def main(args): help='Number of workers used to extract scores.') parser.add_argument('--siw_layer_name', type=str, default='fc', help='Name of the last fully connected layer.') - parser.add_argument('--cuda', type=int, default=0, + parser.add_argument('--cuda', type=int, default=1, help='Specify GPU id to use. Use CPU if -1.') args = parser.parse_args() diff --git a/tests/test_strategies.py b/tests/test_strategies.py index 9af00309c..08de3813a 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -307,20 +307,33 @@ def test_ar1(self): rm_sz=200) self.run_strategy(my_nc_benchmark, strategy) + def run_siw(self, scenario, cl_strategy): + print('Starting experiment...') + cl_strategy.evaluator.loggers = [TextLogger(sys.stdout)] + results = [] + for i, train_batch_info in enumerate(scenario.train_stream): + print("Start of experience ", train_batch_info.current_experience) + + cl_strategy.train(train_batch_info) + print('Training completed') + + print('Computing accuracy on the current test set') + results.append(cl_strategy.eval(scenario.test_stream[:i+1])) + def test_siw(self): # SIT scenario model, optimizer, criterion, my_nc_scenario = self.init_sit() strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier', batch_size=32, num_workers=8, train_mb_size=128, device=self.device, eval_mb_size=32, train_epochs=2) - self.run_strategy(my_nc_scenario, strategy) + self.run_siw(my_nc_scenario, strategy) # MT scenario strategy = SIW(model, optimizer, criterion, siw_layer_name='classifier', batch_size=32, num_workers=8, train_mb_size=128, device=self.device, eval_mb_size=32, train_epochs=2) scenario = self.load_scenario(use_task_labels=False) - self.run_strategy(scenario, strategy) + self.run_siw(scenario, strategy) def load_ar1_scenario(self): """ @@ -365,14 +378,14 @@ def run_strategy(self, scenario, cl_strategy): print('Starting experiment...') cl_strategy.evaluator.loggers = [TextLogger(sys.stdout)] results = [] - for i, train_batch_info in enumerate(scenario.train_stream): + for train_batch_info in scenario.train_stream: print("Start of experience ", train_batch_info.current_experience) cl_strategy.train(train_batch_info) print('Training completed') print('Computing accuracy on the current test set') - results.append(cl_strategy.eval(scenario.test_stream[:i+1])) + results.append(cl_strategy.eval(scenario.test_stream[:])) if __name__ == '__main__': From 113e1ab20e45b197b74067f5c465855546b61962 Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Thu, 22 Apr 2021 11:31:07 +0200 Subject: [PATCH 6/8] fix unittest --- examples/siw_cifar100_2.py | 135 ------------------------------------- 1 file changed, 135 deletions(-) delete mode 100644 examples/siw_cifar100_2.py diff --git a/examples/siw_cifar100_2.py b/examples/siw_cifar100_2.py deleted file mode 100644 index 36d185014..000000000 --- a/examples/siw_cifar100_2.py +++ /dev/null @@ -1,135 +0,0 @@ -from avalanche.benchmarks.classic import SplitCIFAR100 -from torch.optim import SGD -from torch.nn import CrossEntropyLoss -from avalanche.models import SimpleMLP -from avalanche.training.strategies import Naive -from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \ - AGEMPlugin, SIWPlugin, EvaluationPlugin -from avalanche.logging import InteractiveLogger, TextLogger -from avalanche.evaluation.metrics import accuracy_metrics -import torchvision -from avalanche.benchmarks.generators import filelist_scenario, \ - dataset_scenario, tensor_scenario, paths_scenario -from torchvision.transforms import Compose, CenterCrop, Normalize, \ - Scale, Resize, ToTensor, ToPILImage -import torchvision.transforms as transforms -import torch.nn as nn -import torch -from torch.autograd import Variable -import argparse - - -def main(args): - # check if selected GPU is available or use CPU - assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0." - device = torch.device(f"cuda:{args.cuda}" - if torch.cuda.is_available() - and args.cuda >= 0 else "cpu") - print(f'Using device: {device}') - ############################################# - model = torchvision.models.resnet18(num_classes=100).to(device) - - siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name, - batch_size=args.siw_batch_size, - num_workers=args.siw_num_workers) - - # log to text file - text_logger = TextLogger(open('/scratch_global/eden/avalanche/log.txt', 'a')) - - # print to stdout - interactive_logger = InteractiveLogger() - - eval_plugin = EvaluationPlugin( - accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True), - loggers=[interactive_logger, text_logger] - ) - - optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum) - criterion = CrossEntropyLoss() - strategy = Naive(model, optimizer, criterion, plugins=[siw], - device=device, train_epochs=args.epochs, evaluator=eval_plugin) - - normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], - std=[0.2007, 0.1999, 0.1992]) - - train_transform = transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize]) - - test_transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize]) - - # # scenario - # scenario = SplitCIFAR100(n_experiences=10, return_task_id=False, - # seed=1234, train_transform=train_transform, - # eval_transform=test_transform) - - # scenario - scenario = filelist_scenario( - root="", - train_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch1", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch2", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch3", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch4", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch5", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch6", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch7", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch8", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch9", - "/scratch_global/eden/images_list_files/cifar100/s10/separated/train/batch10"], - - test_file_lists=["/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch1", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch2", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch3", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch4", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch5", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch6", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch7", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch8", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch9", - "/scratch_global/eden/images_list_files/cifar100/s10/accumulated/test/batch10"], - - task_labels=list(range(0, 10)), - complete_test_set_only=False, - train_transform=train_transform, - eval_transform=test_transform, - ) - - # TRAINING LOOP - print('Starting experiment...') - results = [] - for i, experience in enumerate(scenario.train_stream): - print("Start of experience: ", experience.current_experience) - strategy.train(experience) - print('Training completed') - print('Computing accuracy on the test set') - res = strategy.eval(scenario.test_stream[i]) - results.append(res) - - print('Results = ' + str(results)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.') - parser.add_argument('--momentum', type=float, default=0.9, help='Momentum') - parser.add_argument('--epochs', type=int, default=10, - help='Number of training epochs.') - parser.add_argument('--batch_size', type=int, default=128, - help='Batch size.') - parser.add_argument('--siw_batch_size', type=int, default=128, - help='Batch size used to extract scores.') - parser.add_argument('--siw_num_workers', type=int, default=8, - help='Number of workers used to extract scores.') - parser.add_argument('--siw_layer_name', type=str, default='fc', - help='Name of the last fully connected layer.') - parser.add_argument('--cuda', type=int, default=0, - help='Specify GPU id to use. Use CPU if -1.') - args = parser.parse_args() - - main(args) From 029882f49663b6378e9653f4c0caa55b2a3bc586 Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Thu, 22 Apr 2021 11:37:08 +0200 Subject: [PATCH 7/8] fix pep8 --- examples/siw_cifar100.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py index edf27f01c..3e398f59a 100644 --- a/examples/siw_cifar100.py +++ b/examples/siw_cifar100.py @@ -37,14 +37,16 @@ def main(args): interactive_logger = InteractiveLogger() eval_plugin = EvaluationPlugin( - accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True), + accuracy_metrics(minibatch=False, epoch=True, experience=True, + stream=True), loggers=[interactive_logger] ) optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum) criterion = CrossEntropyLoss() strategy = Naive(model, optimizer, criterion, plugins=[siw], - device=device, train_epochs=args.epochs, evaluator=eval_plugin) + device=device, train_epochs=args.epochs, + evaluator=eval_plugin) normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], std=[0.2007, 0.1999, 0.1992]) @@ -63,7 +65,8 @@ def main(args): # scenario scenario = SplitCIFAR100(n_experiences=10, return_task_id=False, - fixed_class_order=range(0, 100), train_transform=train_transform, + fixed_class_order=range(0, 100), + train_transform=train_transform, eval_transform=test_transform) # TRAINING LOOP From 4d1c761feb01cbe32de6c9593401f54cf3d90d4c Mon Sep 17 00:00:00 2001 From: BELOUADAH Eden Date: Mon, 26 Apr 2021 15:11:30 +0200 Subject: [PATCH 8/8] fix hyperparameters and unittest bugs --- avalanche/training/plugins/__init__.py | 2 +- .../training/strategies/strategy_wrappers.py | 44 ------- examples/siw_cifar100.py | 117 +++++++++++++----- 3 files changed, 85 insertions(+), 78 deletions(-) diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py index 6da8cd80f..2dd57ac4f 100644 --- a/avalanche/training/plugins/__init__.py +++ b/avalanche/training/plugins/__init__.py @@ -10,4 +10,4 @@ from .strategy_plugin import StrategyPlugin from .synaptic_intelligence import SynapticIntelligencePlugin from .siw import SIWPlugin -from .cope import CoPEPlugin, PPPloss \ No newline at end of file +from .cope import CoPEPlugin, PPPloss diff --git a/avalanche/training/strategies/strategy_wrappers.py b/avalanche/training/strategies/strategy_wrappers.py index 09d8fe137..873512abc 100644 --- a/avalanche/training/strategies/strategy_wrappers.py +++ b/avalanche/training/strategies/strategy_wrappers.py @@ -449,50 +449,6 @@ def __init__(self, model: Module, optimizer: Optimizer, criterion, eval_every=eval_every ) - def __init__(self, model: Module, optimizer: Optimizer, criterion, - patterns_per_exp: int, memory_strength: float = 0.5, - train_mb_size: int = 1, train_epochs: int = 1, - eval_mb_size: int = None, device=None, - plugins: Optional[List[StrategyPlugin]] = None, - evaluator: EvaluationPlugin = default_logger, eval_every=-1): - """ Gradient Episodic Memory (GEM) strategy. - See GEM plugin for details. - This strategy does not use task identities. - - :param model: The model. - :param optimizer: The optimizer to use. - :param criterion: The loss criterion to use. - :param patterns_per_exp: number of patterns per experience in the memory - :param memory_strength: offset to add to the projection direction - in order to favour backward transfer (gamma in original paper). - :param train_mb_size: The train minibatch size. Defaults to 1. - :param train_epochs: The number of training epochs. Defaults to 1. - :param eval_mb_size: The eval minibatch size. Defaults to 1. - :param device: The device to use. Defaults to None (cpu). - :param plugins: Plugins to be added. Defaults to None. - :param evaluator: (optional) instance of EvaluationPlugin for logging - and metric computations. - :param eval_every: the frequency of the calls to `eval` inside the - training loop. - if -1: no evaluation during training. - if 0: calls `eval` after the final epoch of each training - experience. - if >0: calls `eval` every `eval_every` epochs and at the end - of all the epochs for a single experience. - """ - - gem = GEMPlugin(patterns_per_exp, memory_strength) - if plugins is None: - plugins = [gem] - else: - plugins.append(gem) - - super().__init__( - model, optimizer, criterion, - train_mb_size=train_mb_size, train_epochs=train_epochs, - eval_mb_size=eval_mb_size, device=device, plugins=plugins, - evaluator=evaluator, eval_every=eval_every) - class SIW(BaseStrategy): def __init__(self, model: Module, optimizer: Optimizer, criterion, diff --git a/examples/siw_cifar100.py b/examples/siw_cifar100.py index 3e398f59a..66cf49c1f 100644 --- a/examples/siw_cifar100.py +++ b/examples/siw_cifar100.py @@ -1,38 +1,59 @@ from avalanche.benchmarks.classic import SplitCIFAR100 from torch.optim import SGD from torch.nn import CrossEntropyLoss -from avalanche.models import SimpleMLP from avalanche.training.strategies import Naive -from avalanche.training.plugins import ReplayPlugin, EWCPlugin, \ - AGEMPlugin, SIWPlugin, EvaluationPlugin -from avalanche.logging import InteractiveLogger, TextLogger +from avalanche.training.plugins import SIWPlugin,\ + EvaluationPlugin, StrategyPlugin +from avalanche.logging import InteractiveLogger from avalanche.evaluation.metrics import accuracy_metrics import torchvision -from avalanche.benchmarks.generators import filelist_scenario, \ - dataset_scenario, tensor_scenario, paths_scenario -from torchvision.transforms import Compose, CenterCrop, Normalize, \ - Scale, Resize, ToTensor, ToPILImage import torchvision.transforms as transforms import torch.nn as nn import torch -from torch.autograd import Variable import argparse +from torch.optim import lr_scheduler + + +class LRSchedulerPlugin(StrategyPlugin): + def __init__(self, lr_scheduler): + super().__init__() + self.lr_scheduler = lr_scheduler + + def after_training_epoch(self, strategy: 'BaseStrategy', **kwargs): + self.lr_scheduler.step(strategy.loss.cpu().data.numpy()) + lr = strategy.optimizer.param_groups[0]['lr'] + print(f"\nlr = {lr}") + + +class SetIncrementalHyperParams(StrategyPlugin): + def __init__(self, inc_exp_epochs, inc_exp_patience, first_exp_lr, + lr_decay): + super().__init__() + self.inc_exp_epochs = inc_exp_epochs + self.inc_exp_patience = inc_exp_patience + self.first_exp_lr = first_exp_lr + self.lr_decay = lr_decay + + def before_training_exp(self, strategy: 'BaseStrategy', **kwargs): + if strategy.experience.current_experience > 0: # incremental update + strategy.train_epochs = self.inc_exp_epochs + strategy.optimizer.param_groups[0]['lr'] = \ + self.first_exp_lr / strategy.experience.current_experience + strategy.scheduler = LRSchedulerPlugin( + lr_scheduler.ReduceLROnPlateau(strategy.optimizer, + patience=self.inc_exp_patience, + factor=self.lr_decay)) def main(args): # check if selected GPU is available or use CPU assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0." - device = torch.device(f"cuda:{args.cuda}" - if torch.cuda.is_available() + device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu") print(f'Using device: {device}') ############################################# model = torchvision.models.resnet18(num_classes=100).to(device) - siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name, - batch_size=args.siw_batch_size, - num_workers=args.siw_num_workers) - # print to stdout interactive_logger = InteractiveLogger() @@ -42,14 +63,32 @@ def main(args): loggers=[interactive_logger] ) - optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum) + optimizer = SGD(model.parameters(), lr=args.first_exp_lr, + momentum=args.momentum, + weight_decay=args.weight_decay) criterion = CrossEntropyLoss() - strategy = Naive(model, optimizer, criterion, plugins=[siw], - device=device, train_epochs=args.epochs, - evaluator=eval_plugin) + scheduler = LRSchedulerPlugin( + lr_scheduler.ReduceLROnPlateau(optimizer, + patience=args.first_exp_patience, + factor=args.lr_decay)) + incremental_params = SetIncrementalHyperParams(args.inc_exp_epochs, + args.inc_exp_patience, + args.first_exp_lr, + args.lr_decay) + + siw = SIWPlugin(model, siw_layer_name=args.siw_layer_name, + batch_size=args.eval_batch_size, + num_workers=args.num_workers) + + strategy = Naive(model, optimizer, criterion, + device=device, train_epochs=args.first_exp_epochs, + evaluator=eval_plugin, + plugins=[siw, scheduler, incremental_params], + train_mb_size=args.train_batch_size, + eval_mb_size=args.eval_batch_size) - normalize = transforms.Normalize(mean=[0.5356, 0.4898, 0.4255], - std=[0.2007, 0.1999, 0.1992]) + normalize = transforms.Normalize(mean=[0.5071, 0.4866, 0.4409], + std=[0.2673, 0.2564, 0.2762]) train_transform = transforms.Compose([ transforms.RandomResizedCrop(224), @@ -68,16 +107,16 @@ def main(args): fixed_class_order=range(0, 100), train_transform=train_transform, eval_transform=test_transform) - # TRAINING LOOP print('Starting experiment...') results = [] for i, experience in enumerate(scenario.train_stream): print("Start of experience: ", experience.current_experience) - strategy.train(experience) + strategy.train(experience, num_workers=args.num_workers) print('Training completed') print('Computing accuracy on the test set') - res = strategy.eval(scenario.test_stream[:i+1]) + res = strategy.eval(scenario.test_stream[:i + 1], + num_workers=args.num_workers) results.append(res) print('Results = ' + str(results)) @@ -85,15 +124,27 @@ def main(args): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--lr', type=float, default=0.1, help='Learning rate.') - parser.add_argument('--momentum', type=float, default=0.9, help='Momentum') - parser.add_argument('--epochs', type=int, default=100, - help='Number of training epochs.') - parser.add_argument('--batch_size', type=int, default=128, - help='Batch size.') - parser.add_argument('--siw_batch_size', type=int, default=128, - help='Batch size used to extract scores.') - parser.add_argument('--siw_num_workers', type=int, default=8, + parser.add_argument('--first_exp_lr', type=float, default=0.1, + help='Learning rate for the first experience.') + parser.add_argument('--momentum', type=float, default=0.9, + help='Momentum') + parser.add_argument('--weight_decay', type=float, default=0.0005, + help='Weight decay') + parser.add_argument('--lr_decay', type=float, default=0.1, + help='LR decay') + parser.add_argument('--first_exp_patience', type=int, default=60, + help='Patience in the first experience') + parser.add_argument('--inc_exp_patience', type=int, default=15, + help='Patience in the incremental experiences') + parser.add_argument('--first_exp_epochs', type=int, default=300, + help='Number of epochs in the first experience.') + parser.add_argument('--inc_exp_epochs', type=int, default=70, + help='Number of epochs in each incremental experience.') + parser.add_argument('--train_batch_size', type=int, default=128, + help='Training batch size.') + parser.add_argument('--eval_batch_size', type=int, default=32, + help='Evaluation batch size.') + parser.add_argument('--num_workers', type=int, default=8, help='Number of workers used to extract scores.') parser.add_argument('--siw_layer_name', type=str, default='fc', help='Name of the last fully connected layer.')