From 18b75f3b66d437a886ac57e7fc533e388759a03f Mon Sep 17 00:00:00 2001 From: zhaoqi10 Date: Tue, 31 Jan 2023 19:50:41 +0800 Subject: [PATCH 1/5] feat: add multi-task classification task. --- .gitignore | 8 +- plsc/data/dataset/mtl_dataset.py | 163 ++++++ plsc/data/sampler/mtl_sampler.py | 89 +++ .../multi_task_classfication/__init__.py | 13 + .../multi_task_classfication/trainer.py | 512 ++++++++++++++++++ plsc/loss/MTLoss.py | 76 +++ plsc/loss/distill_loss.py | 34 ++ plsc/models/multi_task/MTLModel.py | 131 +++++ plsc/models/multi_task/ResNet_backbone.py | 339 ++++++++++++ plsc/models/multi_task/__init__.py | 13 + plsc/models/multi_task/head.py | 213 ++++++++ .../multi_task_resnet18_dp_fp16o2_demo.yaml | 217 ++++++++ tools/mtl_train.py | 36 ++ 13 files changed, 1839 insertions(+), 5 deletions(-) create mode 100644 plsc/data/dataset/mtl_dataset.py create mode 100644 plsc/data/sampler/mtl_sampler.py create mode 100644 plsc/engine/multi_task_classfication/__init__.py create mode 100644 plsc/engine/multi_task_classfication/trainer.py create mode 100644 plsc/loss/MTLoss.py create mode 100644 plsc/loss/distill_loss.py create mode 100644 plsc/models/multi_task/MTLModel.py create mode 100644 plsc/models/multi_task/ResNet_backbone.py create mode 100644 plsc/models/multi_task/__init__.py create mode 100644 plsc/models/multi_task/head.py create mode 100644 task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml create mode 100644 tools/mtl_train.py diff --git a/.gitignore b/.gitignore index 4d685bf..a0d5335 100644 --- a/.gitignore +++ b/.gitignore @@ -103,8 +103,6 @@ ENV/ .DS_Store # logs and output -**/output/ -**/log/ - -# dataset -**/dataset/ \ No newline at end of file +output/ +log/ +dataset/ diff --git a/plsc/data/dataset/mtl_dataset.py b/plsc/data/dataset/mtl_dataset.py new file mode 100644 index 0000000..cfb6dec --- /dev/null +++ b/plsc/data/dataset/mtl_dataset.py @@ -0,0 +1,163 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Single-task Dataset and ConcatDataset are realized. +Multi-task dataset(ConcatDataset) can be composed by multiple single-task datasets. +""" + +import warnings +import bisect +import cv2 +from os.path import join +import numpy as np +import random + +import paddle +from paddle.io import Dataset +from plsc.data.utils import create_preprocess_operators + + +class SingleTaskDataset(Dataset): + """ + Single-task Dataset. + The input file includes single task dataset. + """ + + def __init__(self, task_id, data_root, label_path, transform_ops): + self.task_id = task_id + self.data_root = data_root + self.transform_ops = None + if transform_ops is not None: + self.transform_ops = create_preprocess_operators(transform_ops) + self.data_list = [] + with open(join(data_root, label_path), "r") as f: + for line in f: + img_path, label = line.strip().split(" ") + self.data_list.append( + (join(data_root, "images", img_path), int(label))) + + def __getitem__(self, idx): + img_path, label = self.data_list[idx] + with open(img_path, 'rb') as f: + img = f.read() + if self.transform_ops: + img = self.transform_ops(img) + if label == -1: + label = 0 + label = paddle.to_tensor(label, dtype=paddle.int32) + return img, label, self.task_id + + def __len__(self): + return len(self.data_list) + + +class ConcatDataset(Dataset): + """ + + Dataset that are composed by multiple datasets. + Multi-task Dataset can be the concatenation of single-task datasets. + """ + + @staticmethod + def cumsum(sequence, ratio_list): + r, s = [], 0 + for i, e in enumerate(sequence): + l = int(len(e) * ratio_list[i]) + r.append(l + s) + s += l + return r + + def __init__(self, datasets, dataset_ratio=None): + super(ConcatDataset, self).__init__() + assert len(datasets) > 0, 'datasets should not be an empty iterable' + self.datasets = list(datasets) + + if dataset_ratio is not None: + assert len(dataset_ratio) == len(datasets) + self.dataset_ratio = { + i: dataset_ratio[i] + for i in range(len(dataset_ratio)) + } + else: + self.dataset_ratio = {i: 1. for i in range(len(datasets))} + + self.cumulative_sizes = self.cumsum(self.datasets, self.dataset_ratio) + self.idx_ds_map = { + idx: bisect.bisect_right(self.cumulative_sizes, idx) + for idx in range(self.__len__()) + } + + def __len__(self): + return self.cumulative_sizes[-1] + + def __getitem__(self, idx): + dataset_idx = self.idx_ds_map[idx] + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + if sample_idx >= len(self.datasets[dataset_idx]): + sample_idx = random.choice(range(len(self.datasets[dataset_idx]))) + return self.datasets[dataset_idx][sample_idx] + + @property + def cummulative_sizes(self): + warnings.warn( + "cummulative_sizes attribute is renamed to " + "cumulative_sizes", + DeprecationWarning, + stacklevel=2) + return self.cumulative_sizes + + +class MultiTaskDataset(Dataset): + """ + Multi-Task Dataset. + The input file includes multi-task datasets. + """ + + def __init__(self, task_ids, data_root, label_path, transform_ops): + """ + + Args: + task_ids: task id list + data_root: + label_path: + transform_ops: + """ + self.task_ids = task_ids + self.data_root = data_root + self.transform_ops = None + if transform_ops is not None: + self.transform_ops = create_preprocess_operators(transform_ops) + self.data_list = [] + with open(join(data_root, label_path), "r") as f: + for line in f: + img_path, labels = line.strip().split(" ", 1) + labels = [int(label) for label in labels.strip().split(" ")] + self.data_list.append( + (join(data_root, "images", img_path), labels)) + + def __getitem__(self, idx): + img_path, labels = self.data_list[idx] + with open(img_path, 'rb') as f: + img = f.read() + if self.transform_ops: + img = self.transform_ops(img) + labels = [0 if label == -1 else label for label in labels] + labels = paddle.to_tensor(np.array(labels), dtype=paddle.int32) + return img, labels, np.array(self.task_ids) + + def __len__(self): + return len(self.data_list) diff --git a/plsc/data/sampler/mtl_sampler.py b/plsc/data/sampler/mtl_sampler.py new file mode 100644 index 0000000..703487f --- /dev/null +++ b/plsc/data/sampler/mtl_sampler.py @@ -0,0 +1,89 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import numpy as np + +from paddle.io import DistributedBatchSampler + + +class MTLSampler(DistributedBatchSampler): + def __init__(self, + dataset, + batch_size, + num_replicas=None, + rank=None, + shuffle=False, + drop_last=False, + idx_sample_p: dict=None): + super(MTLSampler, self).__init__( + dataset, + batch_size, + num_replicas=num_replicas, + rank=rank, + shuffle=shuffle, + drop_last=drop_last) + self.idx_sample_p = idx_sample_p + + def resample(self): + num_samples = len(self.dataset) + indices = np.arange(num_samples).tolist() + + return indices + + def __iter__(self): + num_samples = len(self.dataset) + indices = np.arange(num_samples).tolist() + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + if self.shuffle: + np.random.RandomState(self.epoch).shuffle(indices) + self.epoch += 1 + + # subsample + def _get_indices_by_batch_size(indices): + subsampled_indices = [] + last_batch_size = self.total_size % (self.batch_size * self.nranks) + assert last_batch_size % self.nranks == 0 + last_local_batch_size = last_batch_size // self.nranks + + for i in range(self.local_rank * self.batch_size, + len(indices) - last_batch_size, + self.batch_size * self.nranks): + subsampled_indices.extend(indices[i:i + self.batch_size]) + + indices = indices[len(indices) - last_batch_size:] + subsampled_indices.extend(indices[ + self.local_rank * last_local_batch_size:( + self.local_rank + 1) * last_local_batch_size]) + return np.array(subsampled_indices) + + if self.nranks > 1: + indices = _get_indices_by_batch_size(indices) + + assert len(indices) == self.num_samples + _sample_iter = iter(indices) + if self.idx_sample_p is not None: + assert len(self.idx_sample_p) == len(self.dataset), \ + "length of idx_sample_p must be equal to dataset" + batch_indices = [] + sample_p = [self.idx_sample_p[ind] for ind in indices] + for _ in range(len(indices)): + idx = np.random.choice(indices, replace=True, p=sample_p) + batch_indices.append(idx) + if len(batch_indices) == self.batch_size: + yield batch_indices + batch_indices = [] + if not self.drop_last and len(batch_indices) > 0: + yield batch_indices diff --git a/plsc/engine/multi_task_classfication/__init__.py b/plsc/engine/multi_task_classfication/__init__.py new file mode 100644 index 0000000..4d4247d --- /dev/null +++ b/plsc/engine/multi_task_classfication/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plsc/engine/multi_task_classfication/trainer.py b/plsc/engine/multi_task_classfication/trainer.py new file mode 100644 index 0000000..9f85e8f --- /dev/null +++ b/plsc/engine/multi_task_classfication/trainer.py @@ -0,0 +1,512 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import math +import numpy as np +import os +import random +import time +from typing import Dict +from collections import defaultdict + +import paddle +import paddle.distributed as dist +from paddle.io import DataLoader, DistributedBatchSampler +# from paddle.distributed.fleet.utils.hybrid_parallel_util +# import fused_allreduce_gradients + +from plsc.engine.engine import Engine +from plsc.utils import logger +from plsc.utils import io +from plsc.models.multi_task.MTLModel import MTLModel +from plsc.loss.MTLoss import MTLoss +from plsc.core.param_fuse import get_fused_params +from plsc.core import recompute_warp, GradScaler, param_sync, grad_sync +from plsc.models.multi_task.ResNet_backbone import IResNet18, IResNet50 +from plsc.models.multi_task.head import TaskBlock +from plsc.scheduler import ViTLRScheduler +from plsc.optimizer import AdamW, ClipGradByGlobalNorm +from plsc.data.sampler.mtl_sampler import MTLSampler +from plsc.metric.metrics import TopkAcc +from plsc.data.dataset.mtl_dataset import SingleTaskDataset, \ + MultiTaskDataset, ConcatDataset + + +class MTLEngine(object): + def __init__(self, config, mode="Train"): + self.mode = mode + self.finetune = False + self.rank = dist.get_rank() + self.world_size = dist.get_world_size() + self.config = config + self.parse_config() + self.build_modules() + + @staticmethod + def params_counts(model): + n_parameters = sum(p.numel() for p in model.parameters() + if not p.stop_gradient).item() + i = int(math.log(n_parameters, 10) // 3) + size_unit = ['', 'K', 'M', 'B', 'T', 'Q'] + param_size = n_parameters / math.pow(1000, i) + return param_size, size_unit[i] + + def _init_worker(self, worker_id): + """ set seed in subproces for dataloader when num_workers > 0""" + if self.seed: + np.random.seed(self.seed + worker_id) + random.seed(self.seed + worker_id) + + def parse_config(self): + + # parse global params + for key in self.config["Global"]: + setattr(self, key, self.config["Global"][key]) + + # distillation and get model name + if self.config.get("Distillation", None): + student_model = self.config["Model"].get("Student", None) + teacher_model = self.config["Model"].get("Teacher", None) + assert student_model and teacher_model, "Teacher and Student model must be defined!" + self.model_name = student_model.get("name", None) + + self.distillation = self.config["Distillation"]["Enabled"] + self.soft_weight = self.config["Distillation"]["soft_weight"] + else: + self.distillation = False + if self.config["Model"].get("Teacher", None): + self.model_name = self.config["Model"]["Teacher"].get("name", + None) + else: + self.model_name = self.config["Model"].get("name", None) + assert self.model_name, "model must be defined!" + + # init logger + self.output_dir = self.config['Global']['output_dir'] + log_file = os.path.join(self.output_dir, self.model_name, + f"{self.mode}.log") + logger.init_logger(log_file=log_file) + + # set device + assert self.config["Global"]["device"] in ["cpu", "gpu", "xpu", "npu"] + self.device = paddle.set_device(self.config["Global"]["device"]) + logger.info('train with paddle {}, commit id {} and device {}'.format( + paddle.__version__, paddle.__git_commit__[:8], self.device)) + + # set seed + self.seed = self.config["Global"].get("seed", False) + if self.seed: + assert isinstance(self.seed, int), "The 'seed' must be a integer!" + self.seed += self.rank + paddle.seed(self.seed) + np.random.seed(self.seed) + random.seed(self.seed) + self.worker_init_fn = self._init_worker if self.seed else None + + # distributed strategy + cfg_dist = self.config.get("DistributedStrategy", None) + if cfg_dist.get("data_parallel", None): + self.dp = True + dist.init_parallel_env() + + self.recompute = False + self.recompute_params = {} + if cfg_dist.get("recompute", None): + self.recompute = True + self.recompute_params = cfg_dist["recompute"] + + # amp + cfg_fp16 = self.config.get("FP16", False) + self.fp16_params = {"enable": False} + if cfg_fp16: + self.fp16_params["level"] = cfg_fp16.get("level", "O1") + if self.fp16_params["level"] != 'O0': + self.fp16_params["enable"] = True + cfg_scaler = cfg_fp16.get("GradScaler", {}) + self.scaler = GradScaler(self.fp16_params["enable"], **cfg_scaler) + self.fp16_params["custom_white_list"] = cfg_fp16.get( + "fp16_custom_white_list", None) + self.fp16_params["custom_black_list"] = cfg_fp16.get( + "fp16_custom_black_list", None) + # record + self.print_config() + + def print_config(self): + def print_params_dic(params_dic): + for key in params_dic: + logger.info(f"{key}: {params_dic[key]}") + + logger.info("=" * 16 + " params " + "=" * 16) + print_params_dic(self.config) + logger.info("=" * 40) + + def build_modules(self): + # dataset + for mode in ["Train", "Eval", "Test"]: + self.build_mtl_dataset(mode) + self.build_mtl_sampler(mode) + self.build_mtl_loader(mode) + self.build_metrics(mode) + + # build model + if self.distillation: + self.build_model(opt="Teacher") + self.build_model(opt="Student") + self.model = self.student_model + else: + self.build_model(opt="Teacher") + self.model = self.teacher_model + if self.dp: + param_sync(self.model) + # model = paddle.DataParallel(model, find_unused_parameters=True) + logger.info("DDP model: sync parameters finished") + + # build lr, opt, loss + if self.mode == 'Train': + self.build_lr_scheduler() + self.build_optimizer() + self.build_loss() + if self.distillation: + self.build_distill_loss() + + def build_mtl_dataset(self, mode): + # multi-task dataset + cfg_ds_list = self.config["DataLoader"][mode][ + "dataset"] # dataset list + datasets = [] + all_sample_ratio = [] + for cfg_ds_item in cfg_ds_list: + dataset_name = list(cfg_ds_item.keys())[0] + cfg_ds = cfg_ds_item[dataset_name] + label_path_list = cfg_ds["cls_label_path"] + if not isinstance(cfg_ds["cls_label_path"], list): + label_path_list = [cfg_ds["cls_label_path"]] + assert len(label_path_list) == len(cfg_ds["task_ids"]), \ + "lenght of label_path_list must be equal to task_names" + + sample_ratio = cfg_ds.get("sample_ratio", 1.) + if not isinstance(sample_ratio, list): + sample_ratio = [sample_ratio] * len(label_path_list) + all_sample_ratio += sample_ratio + + for i in range(len(label_path_list)): + st_dataset = eval(cfg_ds["name"])( + cfg_ds["task_ids"][i], cfg_ds["data_root"], + label_path_list[i], cfg_ds["transform_ops"]) + datasets.append(st_dataset) + if len(datasets) >= 1: + dataset = ConcatDataset(datasets, dataset_ratio=all_sample_ratio) + else: + dataset = datasets[0] + setattr(self, f"{mode.lower()}_dataset", dataset) + logger.debug(f"Build {mode} dataset succeed.") + + def build_mtl_sampler(self, mode): + # multi-task sampler + cfg_sampler = self.config["DataLoader"][mode]["sampler"] + sampler_name = cfg_sampler.pop("name") + dataset = getattr(self, f"{mode.lower()}_dataset") + batch_sampler = eval(sampler_name)(dataset, **cfg_sampler) + logger.debug("build batch_sampler({}) success...".format(sampler_name)) + setattr(self, f"{mode.lower()}_sampler", batch_sampler) + logger.debug(f"Build {mode} sampler succeed.") + + def build_mtl_loader(self, mode): + # multi-task data loader + config_loader = self.config["DataLoader"][mode]["loader"] + dataset = getattr(self, f"{mode.lower()}_dataset") + sampler = getattr(self, f"{mode.lower()}_sampler") + data_loader = DataLoader( + dataset=dataset, + places=self.device, + num_workers=config_loader.num_workers, + return_list=True, + use_shared_memory=config_loader.use_shared_memory, + batch_sampler=sampler, + worker_init_fn=self.worker_init_fn) + setattr(self, f"{mode.lower()}_dataloader", data_loader) + logger.debug(f"Build {mode} dataloader succeed.") + + def build_model(self, opt=None): + model_config = copy.deepcopy(self.config["Model"]) + if model_config.get(opt, None): + model_config = model_config[opt] + # structure + model_name = model_config["name"] + # backbone + config_backbone = model_config["backbone"] + backbone_name = config_backbone.pop("name") + backbone = eval(backbone_name)(**config_backbone) + # head + config_heads = model_config["heads"] + head_dic = {} + for head_item in config_heads: + cfg_head = copy.deepcopy(head_item[list(head_item.keys())[0]]) + task_ids = cfg_head.pop("task_ids") + class_nums = cfg_head.pop("class_nums") + head_class = cfg_head.pop("name") + if not isinstance(head_class, list): + head_class = [head_class] * len(task_ids) + if not isinstance(class_nums, list): + class_nums = [class_nums] * len(task_ids) + for i, task_id in enumerate(task_ids): + head_dic[self.task_names[task_id]] = (eval(head_class[i])( + class_num=class_nums[i], **cfg_head)) + # merge + model = eval(model_name)(backbone, head_dic, self.recompute, + self.recompute_params) + setattr(self, f"{opt.lower()}_model", model) + param_size, size_unit = self.params_counts(model) + logger.info( + f"Build {opt} model succeed, the number of parameters is: {param_size:.3f}{size_unit}." + ) + + def build_loss(self): + cfg_loss = self.config["Loss"][self.mode] + self.loss_func = MTLoss(self.task_names, cfg_loss) + logger.debug(f"build {self.mode} loss {self.loss_func} success.") + + def build_lr_scheduler(self): + lr_config = copy.deepcopy(self.config.get("LRScheduler", None)) + self.lr_decay_unit = lr_config.get("decay_unit", "step") + lr_config.update({ + "epochs": self.epochs, + "step_each_epoch": len(self.train_dataloader) + }) + if "name" in lr_config: + lr_name = lr_config.pop("name") + lr = eval(lr_name)(**lr_config) + if isinstance(lr, paddle.optimizer.lr.LRScheduler): + self.lr_scheduler = lr + else: + self.lr_scheduler = lr() + else: + self.lr_scheduler = lr_config["learning_rate"] + logger.debug("build lr ({}) success..".format(self.lr_scheduler)) + + def build_optimizer(self): + opt_config = copy.deepcopy(self.config["Optimizer"]) + grad_clip = None + grad_clip_config = opt_config.pop('grad_clip', None) + if grad_clip_config is not None: + grad_clip_name = grad_clip_config.pop('name', + 'ClipGradByGlobalNorm') + grad_clip = eval(grad_clip_name)(**grad_clip_config) + no_weight_decay_name = opt_config.pop('no_weight_decay_name', []) + + param_group = defaultdict(list) + for n, p in self.model.named_parameters(): + state = copy.deepcopy(p.__dict__) + if any(nd in n for nd in no_weight_decay_name): + state['no_weight_decay'] = True + param_group[str(state)].append(p) + + # fuse params + for key in param_group: + if 'gpu' not in paddle.get_device(): + continue + if "'is_distributed': True" in key: + continue + if "'has_sparse_grad': True" in key: + continue + param_group[key] = get_fused_params(param_group[key]) + + # bulid optimizer params + params = [] + for key in param_group: + group = {'params': param_group[key]} + + if "'is_distributed': True" in key: + group['is_distributed'] = True + + if 'no_weight_decay' in key: + group['weight_decay'] = 0.0 + + params.append(group) + + optim_name = opt_config.pop('name') + self.optimizer = eval(optim_name)(params, + lr=self.lr_scheduler, + grad_clip=grad_clip, + **opt_config) + + logger.debug("build optimizer ({}) success..".format(self.optimizer)) + + def build_metrics(self, mode): + cfg_metric = self.config.get("Metric", None) + metrics = [] + if cfg_metric is not None: + metric_func_list = cfg_metric[mode] + for item in metric_func_list: + func_name = list(item.keys())[0] + metrics.append(eval(func_name)(**item[func_name])) + setattr(self, f"{mode.lower()}_metrics", metrics) + logger.debug(f"Build {mode} metrics succeed.") + + def build_distill_loss(self): + cfg_loss = self.config["Distillation"].get("soft_loss", None) + assert cfg_loss is not None, "distillation loss should not be None" + self.distill_loss = MTLoss(self.task_names, cfg_loss) + logger.debug("build distill loss success.") + + def load_model(self): + if self.checkpoint: + io.load_checkpoint(self.checkpoint, self.model, self.optimizer, + self.scaler) + elif self.pretrained_model: + self.model.load_pretrained(self.pretrained_model, self.rank) + if self.distillation: + teacher_model_path = self.config["Global"].get( + "teacher_checkpoint", None) + assert teacher_model_path is not None, "Lack of teacher checkpoint, " \ + "which must be loaded first in distillation mode " + self.teacher_model.load_pretrained(teacher_model_path, self.rank) + self.teacher_model.eval() + logger.info(f"Teacher model initialized.") + + def train(self): + self.load_model() + # train loop + for epoch in range(self.epochs): + self.train_one_epoch(epoch) + # eval + metric_results = {} + if self.eval_during_train and self.eval_unit == "epoch" \ + and (epoch + 1) % self.eval_interval == 0: + metric_results = self.test() + # save model + if (epoch + 1) % self.save_interval == 0 or (epoch + 1 + ) == self.epochs: + model_prefix = "final" if ( + epoch + 1) == self.epochs else f"model_epoch{epoch}" + self.save_model(model_prefix, metric_results) + # update lr + if self.lr_decay_unit == "epoch": + self.optimizer.lr_step() + + def train_one_epoch(self, epoch): + step = 0 + avg_loss = 0 # average loss in the lasted `self.print_batch_step` steps + for images, labels, tasks in self.train_dataloader: + start = time.time() + step += 1 + # compute loss + with paddle.amp.auto_cast(self.fp16_params): + logits = self.model(images) + _, total_loss = self.loss_func(logits, labels, tasks) + if self.distillation: + with paddle.no_grad(): + teacher_logits = self.teacher_model(images) + _, total_soft_loss = self.distill_loss( + logits, teacher_logits, tasks) + total_loss = self.soft_weight * total_soft_loss + ( + 1 - self.soft_weight) * total_loss + scaled = self.scaler.scale(total_loss) + scaled.backward() + grad_sync(self.optimizer.param_groups) + # update params + if (step + 1) % self.accum_steps == 0: + self.scaler.step(self.optimizer) + self.scaler.update() + self.optimizer.clear_grad() + if self.lr_decay_unit == "step": + self.optimizer.lr_step() + # show loss + avg_loss += total_loss.cpu().numpy()[0] + if (step + 1) % self.print_batch_step == 0: + logger.info(f"epoch: {epoch}, step: {step}, " + f"total loss: {avg_loss / self.print_batch_step}") + avg_loss = 0 + end = time.time() + logger.debug(f"one step time = {(end - start): .3f}s") + + def save_model(self, model_prefix, metric_results=None): + io.save_checkpoint( + self.model, + self.optimizer, + self.scaler, + metric_results, + self.output_dir, + model_name=self.model_name, + prefix=model_prefix, + max_num_checkpoint=self.max_num_latest_checkpoint) + + @paddle.no_grad() + def eval(self): + step = 0 + results = {} + bs = {} + self.model.eval() + for images, labels, tasks in self.eval_dataloader: + step += 1 + logits = self.model(images) + for idx, task_name in enumerate(self.task_names): + cond = tasks == idx + if not paddle.any(cond): + continue + preds = logits[task_name][cond] + labels = labels[cond] + for eval_metric in self.eval_metrics: + task_metric = eval_metric(preds, labels) + metric_name = str(eval_metric).replace("()", "") + results[idx] = results.get(idx, {metric_name: {}}) + for key in task_metric: + results[idx][metric_name][key] = \ + results[idx][metric_name].get(key, 0) + task_metric[key] + bs[idx] = bs.get(idx, 0) + 1 + self.model.train() + for idx in results: + for metric in results[idx]: + for key in results[idx][metric]: + results[idx][metric][key] /= bs[idx] + return results + + @paddle.no_grad() + def test(self): + step = 0 + results = {} + bs = {} + self.model.eval() + for images, targets, tasks in self.test_dataloader: + step += 1 + logits = self.model(images) + for idx, task_id in enumerate(tasks[0]): + preds = logits[self.task_names[task_id]] + labels = targets[:, idx] + for metric in self.test_metrics: + task_metric = metric(preds, labels) + metric_name = str(metric).replace("()", "") + results[idx] = results.get(idx, {metric_name: {}}) + for key in task_metric: + results[idx][metric_name][key] = \ + results[idx][metric_name].get(key, 0) + task_metric[key] + bs[idx] = bs.get(idx, 0) + 1 + self.model.train() + for idx in results: + for metric in results[idx]: + for key in results[idx][metric]: + results[idx][metric][key] /= bs[idx] + for task_id in results: + logger.info(f"metrics - task{task_id}: {results[task_id]}") + return results + + @paddle.no_grad() + def export(self): + assert self.mode in ["Export", "export"] + assert self.config.get("Export", None) is not None + assert self.pretrained_model is not None + self.model.eval() + path = os.path.join(self.output_dir, self.model_name) + io.export(self.config["Export"], self.model, path) diff --git a/plsc/loss/MTLoss.py b/plsc/loss/MTLoss.py new file mode 100644 index 0000000..929dc75 --- /dev/null +++ b/plsc/loss/MTLoss.py @@ -0,0 +1,76 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +import paddle +import paddle.nn as nn + +from plsc.loss.celoss import ViTCELoss, CELoss +from plsc.loss.distill_loss import MSELoss + + +class MTLoss(nn.Layer): + """ + multi-task loss framework + """ + + def __init__(self, task_names, cfg): + super().__init__() + self.loss_func = {} + self.loss_weight = {} + self.task_names = task_names + self.loss_names = {} + assert isinstance(cfg, list), "operator config should be a list" + for loss_item in cfg: + # more than 1 loss func, 1 loss func has more than 1 tasks + assert isinstance(loss_item, dict), "yaml format error" + loss_name = list(loss_item.keys())[0] + param = loss_item[loss_name] + task_id_list = param.pop("task_ids", [0]) # default task 0 + assert "weight" in param, \ + "weight must be in param, but param just contains {}".format( + param.keys()) + loss_weight = param.pop("weight", 1.0) + if isinstance(loss_weight, float): + loss_weight = len(task_id_list) * [loss_weight] + assert len(loss_weight) == len(task_id_list), \ + "task weights length must be equal to task number" + weight_sum = sum(loss_weight) + for task_id in task_id_list: + self.loss_names[task_id] = loss_name + self.loss_weight[task_id] = loss_weight[task_id] / weight_sum + self.loss_func[task_id] = eval(loss_name)(**param) + + @staticmethod + def cast_fp32(input): + if input.dtype != paddle.float32: + input = paddle.cast(input, 'float32') + return input + + def __call__(self, input, target, task): + loss_dict = {} + total_loss = 0.0 + for idx in self.loss_func: + cond = task == idx + logits = input[self.task_names[idx]][cond] + if isinstance(target, dict): + labels = target[self.task_names[idx]][cond] + else: + labels = target[cond] + loss = self.loss_func[idx](logits, labels) + weight = self.loss_weight[idx] + loss_dict[idx] = loss[self.loss_names[idx]] + total_loss += loss_dict[idx] * weight + return loss_dict, total_loss diff --git a/plsc/loss/distill_loss.py b/plsc/loss/distill_loss.py new file mode 100644 index 0000000..56b77b9 --- /dev/null +++ b/plsc/loss/distill_loss.py @@ -0,0 +1,34 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# multiple distillation loss functions +# + +import paddle.nn as nn +import paddle.nn.functional as F + + +class MSELoss(nn.Layer): + def __init__(self, temperature): + super().__init__() + self.temperature = temperature + + def forward(self, student_logits, teacher_logits): + + student_sfm = F.log_softmax(student_logits / self.temperature) + teacher_sfm = F.log_softmax(teacher_logits / self.temperature) + loss = nn.functional.mse_loss( + student_sfm, teacher_sfm, reduction="mean") + return {"MSELoss": loss} diff --git a/plsc/models/multi_task/MTLModel.py b/plsc/models/multi_task/MTLModel.py new file mode 100644 index 0000000..0ef144c --- /dev/null +++ b/plsc/models/multi_task/MTLModel.py @@ -0,0 +1,131 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Multi-task Model Framework is realized. +Combine backbone and multiple encoder layers in this framework. +""" + +import os +from typing import List, Dict +from collections import OrderedDict + +import paddle +from paddle.nn import Layer, LayerDict, LayerList +from plsc.models.layers.base_model import Model +from plsc.core.recompute import wrap_forward, recompute_forward + + +class MTLModel(Model): + """ + Multi-task Model Framework. + Recomputing can be turned on. + """ + + def __init__(self, + backbone: Layer, + encoder_heads: Dict, + recompute_on=False, + recompute_params=None): + """ + + Args: + backbone: backbone for feature extraction + encoder_heads: Dict + recompute_on: if recompute is used + recompute_params: recompute layers + """ + super(MTLModel, self).__init__() + if recompute_params is None: + recompute_params = {} + self.backbone = backbone + # {task_names: Layer} + self.encoder_heads = LayerDict(sublayers=encoder_heads) + self.recompute_on = recompute_on + if self.recompute_on: + self.recompute_warp(self.backbone, **recompute_params) + for task_name in self.encoder_heads: + self.recompute_warp(self.encoder_heads[task_name], + **recompute_params) + + def recompute_warp(self, + model, + layer_interval=1, + names=[], + exclude_names=None): + # recompute layers in names list or use layer_interval setting, + # layers in excluded names are excluded. + if exclude_names is None: + exclude_names = ["Dropout", "dropout", "pool"] + for idx, (name, layer) in enumerate(model._sub_layers.items()): + if name in exclude_names: + print(f"continue: {name}") + continue + if isinstance(layer, paddle.nn.LayerList): + for i, (name, sub_layer) in enumerate(layer.named_sublayers()): + if name in exclude_names: + print(f"continue: {name}") + continue + if layer_interval >= 1 and idx % layer_interval == 0: + print('recompute: ', name) + sub_layer.forward = wrap_forward(sub_layer.forward, + recompute_forward) + else: + if layer_interval >= 1 and idx % layer_interval == 0: + print('recompute: ', name) + layer.forward = wrap_forward(layer.forward, + recompute_forward) + + def forward(self, inputs, output_task_names=None): + output = {} + features = self.backbone(inputs) + if output_task_names is not None: + for task_name in output_task_names: + output[task_name] = self.encoder_heads[task_name](features) + else: + for task_name in self.encoder_heads: + output[task_name] = self.encoder_heads[task_name](features) + return output + + def save(self, path, local_rank=0, rank=0): + # save model + dist_state_dict = OrderedDict() + state_dict = self.state_dict() + for name, param in list(state_dict.items()): + if param.is_distributed: + dist_state_dict[name] = state_dict.pop(name) + + if local_rank == 0: + paddle.save(state_dict, path + ".pdparams") + + if len(dist_state_dict) > 0: + paddle.save(dist_state_dict, + path + "_rank{}.pdparams".format(rank)) + + def load_pretrained(self, path, rank=0, finetune=False): + # load pretrained model + if not os.path.exists(path + '.pdparams'): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + + state_dict = paddle.load(path + ".pdparams") + + dist_param_path = path + "_rank{}.pdparams".format(rank) + if os.path.exists(dist_param_path): + dist_state_dict = paddle.load(dist_param_path) + state_dict.update(dist_state_dict) + # clear + dist_state_dict.clear() + + if not finetune: + self.set_dict(state_dict) diff --git a/plsc/models/multi_task/ResNet_backbone.py b/plsc/models/multi_task/ResNet_backbone.py new file mode 100644 index 0000000..c9e721d --- /dev/null +++ b/plsc/models/multi_task/ResNet_backbone.py @@ -0,0 +1,339 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import collections +import numpy as np +import paddle +import paddle.nn as nn + +from plsc.nn import init +from plsc.models.layers.base_model import Model + +import math + +__all__ = ["IResNet18", "IResNet34", "IResNet50", "IResNet100", "IResNet200"] + + +def conv3x3(in_planes, + out_planes, + stride=1, + groups=1, + dilation=1, + data_format="NCHW"): + """3x3 convolution with padding""" + return nn.Conv2D( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + groups=groups, + dilation=dilation, + bias_attr=False, + data_format=data_format) + + +def conv1x1(in_planes, out_planes, stride=1, data_format="NCHW"): + """1x1 convolution""" + return nn.Conv2D( + in_planes, + out_planes, + kernel_size=1, + stride=stride, + bias_attr=False, + data_format=data_format) + + +class IBasicBlock(nn.Layer): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=\ + 1, base_width=64, dilation=1, data_format="NCHW"): + super(IBasicBlock, self).__init__() + if groups != 1 or base_width != 64: + raise ValueError( + 'BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError( + 'Dilation > 1 not supported in BasicBlock') + self.bn1 = nn.BatchNorm2D( + inplanes, epsilon=1e-05, data_format=data_format) + self.conv1 = conv3x3(inplanes, planes, data_format=data_format) + self.bn2 = nn.BatchNorm2D( + planes, epsilon=1e-05, data_format=data_format) + self.prelu = nn.PReLU(planes, data_format=data_format) + self.conv2 = conv3x3(planes, planes, stride, data_format=data_format) + self.bn3 = nn.BatchNorm2D( + planes, epsilon=1e-05, data_format=data_format) + self.downsample = downsample + self.stride = stride + + def forward_impl(self, x): + identity = x + out = self.bn1(x) + out = self.conv1(out) + out = self.bn2(out) + out = self.prelu(out) + out = self.conv2(out) + out = self.bn3(out) + if self.downsample is not None: + identity = self.downsample(x) + out += identity + return out + + def forward(self, x): + return self.forward_impl(x) + + +class IResNet(Model): + def __init__(self, + block, + layers, + dropout=0, + num_features=512, + zero_init_residual=False, + groups=1, + width_per_group=64, + replace_stride_with_dilation=None, + class_num=93431, + pfc_config={"model_parallel": True, + "sample_ratio": 1.0}, + input_image_channel=3, + input_image_width=112, + input_image_height=112, + data_format="NCHW"): + super(IResNet, self).__init__() + + self.layers = layers + self.data_format = data_format + self.input_image_channel = input_image_channel + + assert input_image_width % 16 == 0 + assert input_image_height % 16 == 0 + feat_w = input_image_width // 16 + feat_h = input_image_height // 16 + self.fc_scale = feat_w * feat_h + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError( + 'replace_stride_with_dilation should be None or a 3-element tuple, got {}' + .format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2D( + self.input_image_channel, + self.inplanes, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + data_format=data_format) + self.bn1 = nn.BatchNorm2D( + self.inplanes, epsilon=1e-05, data_format=data_format) + self.prelu = nn.PReLU(self.inplanes, data_format=data_format) + self.layer1 = self._make_layer( + block, 64, layers[0], stride=2, data_format=data_format) + self.layer2 = self._make_layer( + block, + 128, + layers[1], + stride=2, + dilate=replace_stride_with_dilation[0], + data_format=data_format) + self.layer3 = self._make_layer( + block, + 256, + layers[2], + stride=2, + dilate=replace_stride_with_dilation[1], + data_format=data_format) + self.layer4 = self._make_layer( + block, + 512, + layers[3], + stride=2, + dilate=replace_stride_with_dilation[2], + data_format=data_format) + self.bn2 = nn.BatchNorm2D( + 512 * block.expansion, epsilon=1e-05, data_format=data_format) + self.dropout = nn.Dropout(p=dropout) + + self.fc = nn.Linear(512 * block.expansion * self.fc_scale, + num_features) + self.features = nn.BatchNorm1D(num_features, epsilon=1e-05) + # self.features = nn.BatchNorm1D(num_features, epsilon=1e-05, weight_attr=False) + + for m in self.sublayers(): + if isinstance(m, paddle.nn.Conv2D): + init.normal_(m.weight, 0, 0.1) + elif isinstance(m, (paddle.nn.BatchNorm2D, paddle.nn.GroupNorm)): + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + if zero_init_residual: + for m in self.sublayers(): + if isinstance(m, IBasicBlock): + init.constant_(m.bn2.weight, 0) + + def _make_layer(self, + block, + planes, + blocks, + stride=1, + dilate=False, + data_format="NCHW"): + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1( + self.inplanes, + planes * block.expansion, + stride, + data_format=data_format), + nn.BatchNorm2D( + planes * block.expansion, + epsilon=1e-05, + data_format=data_format)) + layers = [] + layers.append( + block( + self.inplanes, + planes, + stride, + downsample, + self.groups, + self.base_width, + previous_dilation, + data_format=data_format)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block( + self.inplanes, + planes, + groups=self.groups, + base_width=self.base_width, + dilation=self.dilation, + data_format=data_format)) + return nn.Sequential(*layers) + + def forward(self, inputs): + + if self.training: + with paddle.no_grad(): + # Note(GuoxiaWang) + # self.features = nn.BatchNorm1D(num_features, epsilon=1e-05, weight_attr=False) + self.features.weight.fill_(1.0) + + if isinstance(inputs, dict): + x = inputs['data'] + else: + x = inputs + + x.stop_gradient = True + if self.data_format == "NHWC": + x = paddle.tensor.transpose(x, [0, 2, 3, 1]) + x = self.conv1(x) + x = self.bn1(x) + x = self.prelu(x) + x = self.layer1(x) + x = self.layer2(x) + + x = self.layer3(x) + x = self.layer4(x) + x = self.bn2(x) + + if self.data_format == "NHWC": + x = paddle.tensor.transpose(x, [0, 3, 1, 2]) + + # return embedding feature + if isinstance(inputs, dict): + res = {'logits': x} + if 'targets' in inputs: + res['targets'] = inputs['targets'] + else: + res = x + return res + + def load_pretrained(self, path, rank=0, finetune=False): + if not os.path.exists(path + '.pdparams'): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + + state_dict = paddle.load(path + ".pdparams") + + dist_param_path = path + "_rank{}.pdparams".format(rank) + if os.path.exists(dist_param_path): + dist_state_dict = paddle.load(dist_param_path) + state_dict.update(dist_state_dict) + + # clear + dist_state_dict.clear() + + if not finetune: + self.set_dict(state_dict) + return + + return + + def save(self, path, local_rank=0, rank=0): + dist_state_dict = collections.OrderedDict() + state_dict = self.state_dict() + for name, param in list(state_dict.items()): + if param.is_distributed: + dist_state_dict[name] = state_dict.pop(name) + + if local_rank == 0: + paddle.save(state_dict, path + ".pdparams") + + if len(dist_state_dict) > 0: + paddle.save(dist_state_dict, + path + "_rank{}.pdparams".format(rank)) + + +def IResNet18(**kwargs): + model = IResNet(IBasicBlock, [2, 2, 2, 2], **kwargs) + return model + + +def IResNet34(**kwargs): + model = IResNet(IBasicBlock, [3, 4, 6, 3], **kwargs) + return model + + +def IResNet50(**kwargs): + model = IResNet(IBasicBlock, [3, 4, 14, 3], **kwargs) + return model + + +def IResNet100(**kwargs): + model = IResNet(IBasicBlock, [3, 13, 30, 3], **kwargs) + return model + + +def IResNet200(**kwargs): + model = IResNet(IBasicBlock, [6, 26, 60, 6], **kwargs) + return model diff --git a/plsc/models/multi_task/__init__.py b/plsc/models/multi_task/__init__.py new file mode 100644 index 0000000..4d4247d --- /dev/null +++ b/plsc/models/multi_task/__init__.py @@ -0,0 +1,13 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/plsc/models/multi_task/head.py b/plsc/models/multi_task/head.py new file mode 100644 index 0000000..d436bad --- /dev/null +++ b/plsc/models/multi_task/head.py @@ -0,0 +1,213 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Multi-task heads. +Only defined convolution blocks. +""" +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr +from paddle.nn import Conv2D, BatchNorm, Linear, \ + MaxPool2D, Dropout, PReLU + + +class ConvBNLayer(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None, + data_format="NCHW"): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + data_format=data_format) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + epsilon=1e-05, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + data_layout=data_format) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class ConvBNLayerAttr(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + padding=0, + act=None, + name=None): + super(ConvBNLayerAttr, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=ParamAttr(), + bias_attr=False) + self._batch_norm = BatchNorm(num_filters, act=act) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + name=None, + data_format="NCHW"): + super(BasicBlock, self).__init__() + self.stride = stride + bn_name = "bn_" + name[3:] + "_before" + self._batch_norm = BatchNorm( + num_channels, + act=None, + epsilon=1e-05, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + data_layout=data_format) + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=1, + act=None, + name=name + "_branch2a", + data_format=data_format) + self.prelu = PReLU( + num_parameters=num_filters, + data_format=data_format, + name=name + "_branch2a_prelu") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act=None, + name=name + "_branch2b", + data_format=data_format) + + if shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + act=None, + name=name + "_branch1", + data_format=data_format) + + self.shortcut = shortcut + + def forward(self, inputs): + y = self._batch_norm(inputs) + y = self.conv0(y) + y = self.prelu(y) + conv1 = self.conv1(y) + + if self.shortcut: + short = self.short(inputs) + else: + short = inputs + y = paddle.add(x=short, y=conv1) + return y + + +class TaskBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride=1, + shortcut=True, + padding=0, + name=None, + class_num=10, + task_occlu=False, + data_format="NCHW"): + super(TaskBlock, self).__init__() + self.conv_for_fc = ConvBNLayerAttr( + num_channels=num_channels, + num_filters=64, + filter_size=3, + stride=1, + padding=1, + act=None, + name="conv_for_fc") + self.prelu_bottom = PReLU( + num_parameters=64, data_format=data_format, name="prelu_bottom") + + self.conv0 = ConvBNLayerAttr( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=1, + padding=padding, + act=None) + self.pool = MaxPool2D(kernel_size=2, stride=2, padding=0) + self.fc0 = Linear(256, 128) + self.prelu0 = PReLU(num_parameters=64) + self.prelu1 = PReLU(num_parameters=128) + self.task_occlu = task_occlu + self.fc1 = Linear(128, class_num) + + def forward(self, inputs): + y = self.conv_for_fc(inputs) + # y = self.prelu_bottom(y) + y = self.conv0(y) + y = self.prelu0(y) + N = y.shape[0] + y = self.pool(y) + y = paddle.reshape(y, [N, -1]) # 128, 64 * 3 * 3 + y = self.fc0(y) + y = self.prelu1(y) + out = self.fc1(y) + return out diff --git a/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml b/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml new file mode 100644 index 0000000..eeaaaef --- /dev/null +++ b/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml @@ -0,0 +1,217 @@ +# global configs +Global: + checkpoint: null # current main model + teacher_checkpoint: output/teacher/MTLModel/model_epoch0 + pretrained_model: null + output_dir: ./output/student + device: gpu + save_interval: 1 + max_num_latest_checkpoint: 3 + eval_during_train: True + eval_interval: 1 + eval_unit: "epoch" + accum_steps: 1 + epochs: 300 + print_batch_step: 10 + use_visualdl: False + seed: 2023 + task_names: [Arched_Eyebrows, Attractive, Bags_Under_Eyes, Bald, Clock_Shadow] + +# FP16 setting +FP16: + level: O0 + GradScaler: + init_loss_scaling: 65536.0 + + +DistributedStrategy: + data_parallel: True +# recompute: +# layer_interval: 4 +# names: [] +# exclude_names: ["dropout", "pool", "downsample"] + + +# model architecture +Model: + Teacher: + name: MTLModel + backbone: + name: IResNet50 + num_features: 512 + data_format: "NHWC" + heads: + - head0: # head 类型,一个类型的head可以支持多个任务,但是每个任务有一个head实例 + name: TaskBlock + task_ids: [0, 1, 2, 3, 4] + num_channels: 512 + num_filters: 64 + class_nums: [2, 10, 6, 5, 3] + data_format : "NHWC" + Student: + name: MTLModel + backbone: + name: IResNet18 + num_features: 512 + data_format: "NHWC" + heads: + - head0: + name: TaskBlock + task_ids: [ 0, 1, 2, 3, 4 ] + class_nums: [2, 10, 6, 5, 3] + num_channels: 512 + num_filters: 64 + data_format: "NHWC" + + +Distillation: + Enabled: True + soft_weight: 0.9 + soft_loss: + - MSELoss: + task_ids: [ 0, 1, 2, 3, 4 ] + weight: [ 2, 2, 2, 2, 1 ] + temperature: 2 + + +# loss function config for traing/eval process +Loss: + Train: + - ViTCELoss: + task_ids: [0, 1, 2, 3, 4] + weight: [2, 2, 2, 2, 1] + epsilon: 0.0001 + Eval: + - CELoss: + weight: 1.0 + +LRScheduler: + name: ViTLRScheduler + learning_rate: 3e-3 + decay_type: cosine + warmup_steps: 10000 + +Optimizer: + name: AdamW + betas: (0.9, 0.999) + epsilon: 1e-8 + weight_decay: 0.3 + grad_clip: + name: ClipGradByGlobalNorm + clip_norm: 1.0 + +# data loader for train and eval +DataLoader: + Train: + dataset: + - dataset0: # dataset类型,多个任务可以共用一种类型的dataset,但每个任务有自己的dataset实例,最终会concat成为一个整体的dataset + name: SingleTaskDataset + data_root: ./datasets/ + task_ids: [0, 1, 2, 3, 4] + cls_label_path: [Arched_Eyebrows_label.txt, Attractive_label.txt, Bags_Under_Eyes_label.txt, Bald_label.txt, Clock_Shadow_label.txt] + sample_ratio: [2, 1, 2, 20, 4] + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 112 + scale: [0.05, 1.0] + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + Eval: + dataset: + - dataset0: + name: SingleTaskDataset + data_root: ./datasets/ + task_ids: [0] + cls_label_path: [Arched_Eyebrows_label.txt] + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 112 + scale: [0.05, 1.0] + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + Test: + dataset: + - dataset0: + name: MultiTaskDataset + data_root: ./datasets/ + cls_label_path: [test.txt] + task_ids: [[0, 1, 2, 3, 4]] + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 112 + scale: [ 0.05, 1.0 ] + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] + order: '' + - ToCHWImage: + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Train: + - TopkAcc: + topk: [1, 5] + Eval: + - TopkAcc: + topk: [1, 5] + Test: + - TopkAcc: + topk: [ 1, 5 ] + +Export: + export_type: paddle + input_shape: [None, 3, 112, 112] diff --git a/tools/mtl_train.py b/tools/mtl_train.py new file mode 100644 index 0000000..285ea5b --- /dev/null +++ b/tools/mtl_train.py @@ -0,0 +1,36 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +paddle.disable_static() + +from plsc.utils import config as cfg_util +from plsc.engine.multi_task_classfication.trainer import MTLEngine + + +def main(): + args = cfg_util.parse_args() + config = cfg_util.get_config( + args.config, overrides=args.override, show=False) + config.profiler_options = args.profiler_options + engine = MTLEngine(config, mode="Train") + engine.train() + + +if __name__ == "__main__": + main() From a237212c38ed5f34364250273ebbf2f25d7fd9a0 Mon Sep 17 00:00:00 2001 From: zhaoqi10 Date: Thu, 2 Feb 2023 18:32:57 +0800 Subject: [PATCH 2/5] fix: update build modules by using original build modules. --- plsc/data/dataset/__init__.py | 1 + plsc/data/dataset/mtl_dataset.py | 51 ++- .../multi_task_classfication/trainer.py | 362 ++++-------------- plsc/loss/MTLoss.py | 70 ++-- plsc/loss/__init__.py | 8 + plsc/metric/__init__.py | 4 +- plsc/models/__init__.py | 20 +- plsc/models/multi_task/MTLModel.py | 63 ++- .../multi_task_resnet18_dp_fp16o2_demo.yaml | 169 ++++---- 9 files changed, 318 insertions(+), 430 deletions(-) diff --git a/plsc/data/dataset/__init__.py b/plsc/data/dataset/__init__.py index 3238751..8b96482 100644 --- a/plsc/data/dataset/__init__.py +++ b/plsc/data/dataset/__init__.py @@ -64,3 +64,4 @@ def default_loader(path: str): from .imagenet_dataset import ImageNetDataset from .face_recognition_dataset import FaceIdentificationDataset, FaceVerificationDataset, FaceRandomDataset from .imagefolder_dataset import ImageFolder +from .mtl_dataset import SingleTaskDataset, MultiTaskDataset, ConcatDataset diff --git a/plsc/data/dataset/mtl_dataset.py b/plsc/data/dataset/mtl_dataset.py index cfb6dec..7c23c7e 100644 --- a/plsc/data/dataset/mtl_dataset.py +++ b/plsc/data/dataset/mtl_dataset.py @@ -15,7 +15,7 @@ Single-task Dataset and ConcatDataset are realized. Multi-task dataset(ConcatDataset) can be composed by multiple single-task datasets. """ - +from collections import Iterable import warnings import bisect import cv2 @@ -55,8 +55,9 @@ def __getitem__(self, idx): img = self.transform_ops(img) if label == -1: label = 0 - label = paddle.to_tensor(label, dtype=paddle.int32) - return img, label, self.task_id + label = paddle.to_tensor(np.array([label]), dtype=paddle.int32) + target = {"label": label, "task": self.task_id} + return img, target def __len__(self): return len(self.data_list) @@ -80,17 +81,19 @@ def cumsum(sequence, ratio_list): def __init__(self, datasets, dataset_ratio=None): super(ConcatDataset, self).__init__() - assert len(datasets) > 0, 'datasets should not be an empty iterable' - self.datasets = list(datasets) + assert isinstance(datasets, + Iterable), "datasets should not be iterable." + assert len(datasets) > 0, " datasets length should be greater than 0." + self.instance_datasets(datasets) if dataset_ratio is not None: - assert len(dataset_ratio) == len(datasets) + assert len(dataset_ratio) == len(self.datasets) self.dataset_ratio = { i: dataset_ratio[i] for i in range(len(dataset_ratio)) } else: - self.dataset_ratio = {i: 1. for i in range(len(datasets))} + self.dataset_ratio = {i: 1. for i in range(len(self.datasets))} self.cumulative_sizes = self.cumsum(self.datasets, self.dataset_ratio) self.idx_ds_map = { @@ -98,6 +101,33 @@ def __init__(self, datasets, dataset_ratio=None): for idx in range(self.__len__()) } + def instance_datasets(self, datasets): + # get class instance from config dict + dataset_list = [] + for ds in datasets: + if isinstance(ds, SingleTaskDataset): + continue + if isinstance(ds, dict): + name = list(ds.keys())[0] + params = ds[name] + task_ids = params.pop("task_ids", [0]) + if not isinstance(task_ids, list): + task_ids = [task_ids] + label_path = params.pop("label_path") + if not isinstance(label_path, list): + label_path = [label_path] + assert len(label_path) == len( + task_ids), "Length of label_path should equal to task_ids." + for task_id, label_path in zip(task_ids, label_path): + dataset = eval(name)(task_id=task_id, + label_path=label_path, + **params) + dataset_list.append(dataset) + if len(dataset_list) > 0: + self.datasets = dataset_list + else: + self.datasets = list(datasets) + def __len__(self): return self.cumulative_sizes[-1] @@ -127,7 +157,7 @@ class MultiTaskDataset(Dataset): The input file includes multi-task datasets. """ - def __init__(self, task_ids, data_root, label_path, transform_ops): + def __init__(self, task_id, data_root, label_path, transform_ops): """ Args: @@ -136,7 +166,7 @@ def __init__(self, task_ids, data_root, label_path, transform_ops): label_path: transform_ops: """ - self.task_ids = task_ids + self.task_id = task_id self.data_root = data_root self.transform_ops = None if transform_ops is not None: @@ -157,7 +187,8 @@ def __getitem__(self, idx): img = self.transform_ops(img) labels = [0 if label == -1 else label for label in labels] labels = paddle.to_tensor(np.array(labels), dtype=paddle.int32) - return img, labels, np.array(self.task_ids) + target = {"label": labels, "task": self.task_id} + return img, target def __len__(self): return len(self.data_list) diff --git a/plsc/engine/multi_task_classfication/trainer.py b/plsc/engine/multi_task_classfication/trainer.py index 9f85e8f..e99ce6c 100644 --- a/plsc/engine/multi_task_classfication/trainer.py +++ b/plsc/engine/multi_task_classfication/trainer.py @@ -18,30 +18,19 @@ import os import random import time -from typing import Dict -from collections import defaultdict import paddle import paddle.distributed as dist -from paddle.io import DataLoader, DistributedBatchSampler -# from paddle.distributed.fleet.utils.hybrid_parallel_util -# import fused_allreduce_gradients -from plsc.engine.engine import Engine -from plsc.utils import logger -from plsc.utils import io -from plsc.models.multi_task.MTLModel import MTLModel -from plsc.loss.MTLoss import MTLoss -from plsc.core.param_fuse import get_fused_params -from plsc.core import recompute_warp, GradScaler, param_sync, grad_sync -from plsc.models.multi_task.ResNet_backbone import IResNet18, IResNet50 -from plsc.models.multi_task.head import TaskBlock -from plsc.scheduler import ViTLRScheduler -from plsc.optimizer import AdamW, ClipGradByGlobalNorm -from plsc.data.sampler.mtl_sampler import MTLSampler -from plsc.metric.metrics import TopkAcc -from plsc.data.dataset.mtl_dataset import SingleTaskDataset, \ - MultiTaskDataset, ConcatDataset +from plsc.utils import logger, io +from plsc.utils.config import print_config +from plsc.models import build_model +from plsc.loss import build_mtl_loss +from plsc.core import GradScaler, param_sync, grad_sync +from plsc.optimizer import build_optimizer +from plsc.metric import build_metrics +from plsc.data import build_dataloader +from plsc.scheduler import build_lr_scheduler class MTLEngine(object): @@ -75,22 +64,7 @@ def parse_config(self): for key in self.config["Global"]: setattr(self, key, self.config["Global"][key]) - # distillation and get model name - if self.config.get("Distillation", None): - student_model = self.config["Model"].get("Student", None) - teacher_model = self.config["Model"].get("Teacher", None) - assert student_model and teacher_model, "Teacher and Student model must be defined!" - self.model_name = student_model.get("name", None) - - self.distillation = self.config["Distillation"]["Enabled"] - self.soft_weight = self.config["Distillation"]["soft_weight"] - else: - self.distillation = False - if self.config["Model"].get("Teacher", None): - self.model_name = self.config["Model"]["Teacher"].get("name", - None) - else: - self.model_name = self.config["Model"].get("name", None) + self.model_name = self.config["Model"].get("name", None) assert self.model_name, "model must be defined!" # init logger @@ -99,6 +73,9 @@ def parse_config(self): f"{self.mode}.log") logger.init_logger(log_file=log_file) + # record + print_config(self.config) + # set device assert self.config["Global"]["device"] in ["cpu", "gpu", "xpu", "npu"] self.device = paddle.set_device(self.config["Global"]["device"]) @@ -140,226 +117,57 @@ def parse_config(self): "fp16_custom_white_list", None) self.fp16_params["custom_black_list"] = cfg_fp16.get( "fp16_custom_black_list", None) - # record - self.print_config() - - def print_config(self): - def print_params_dic(params_dic): - for key in params_dic: - logger.info(f"{key}: {params_dic[key]}") - - logger.info("=" * 16 + " params " + "=" * 16) - print_params_dic(self.config) - logger.info("=" * 40) def build_modules(self): # dataset - for mode in ["Train", "Eval", "Test"]: - self.build_mtl_dataset(mode) - self.build_mtl_sampler(mode) - self.build_mtl_loader(mode) - self.build_metrics(mode) + if self.mode == "Train": + for mode in ["Train", "Eval"]: + data_loader = build_dataloader( + self.config["DataLoader"], + mode, + self.device, + worker_init_fn=self.worker_init_fn) + setattr(self, f"{mode.lower()}_dataloader", data_loader) + self.eval_metrics = build_metrics(self.config["Metric"]["Eval"]) + else: + data_loader = build_dataloader( + self.config["DataLoader"], + self.mode, + self.device, + worker_init_fn=self.worker_init_fn) + setattr(self, f"{self.mode.lower()}_dataloader", data_loader) + + metrics = build_metrics(self.config["Metric"][self.mode]) + setattr(self, f"{self.mode.lower()}_metrics", metrics) # build model - if self.distillation: - self.build_model(opt="Teacher") - self.build_model(opt="Student") - self.model = self.student_model - else: - self.build_model(opt="Teacher") - self.model = self.teacher_model + self.model = build_model( + self.config["Model"], + task_names=self.task_names, + recompute_on=self.recompute, + recompute_params=self.recompute_params) + param_size, size_unit = self.params_counts(self.model) + logger.info( + f"The number of parameters is: {param_size:.3f}{size_unit}.") if self.dp: param_sync(self.model) - # model = paddle.DataParallel(model, find_unused_parameters=True) logger.info("DDP model: sync parameters finished") # build lr, opt, loss if self.mode == 'Train': - self.build_lr_scheduler() - self.build_optimizer() - self.build_loss() - if self.distillation: - self.build_distill_loss() - - def build_mtl_dataset(self, mode): - # multi-task dataset - cfg_ds_list = self.config["DataLoader"][mode][ - "dataset"] # dataset list - datasets = [] - all_sample_ratio = [] - for cfg_ds_item in cfg_ds_list: - dataset_name = list(cfg_ds_item.keys())[0] - cfg_ds = cfg_ds_item[dataset_name] - label_path_list = cfg_ds["cls_label_path"] - if not isinstance(cfg_ds["cls_label_path"], list): - label_path_list = [cfg_ds["cls_label_path"]] - assert len(label_path_list) == len(cfg_ds["task_ids"]), \ - "lenght of label_path_list must be equal to task_names" - - sample_ratio = cfg_ds.get("sample_ratio", 1.) - if not isinstance(sample_ratio, list): - sample_ratio = [sample_ratio] * len(label_path_list) - all_sample_ratio += sample_ratio - - for i in range(len(label_path_list)): - st_dataset = eval(cfg_ds["name"])( - cfg_ds["task_ids"][i], cfg_ds["data_root"], - label_path_list[i], cfg_ds["transform_ops"]) - datasets.append(st_dataset) - if len(datasets) >= 1: - dataset = ConcatDataset(datasets, dataset_ratio=all_sample_ratio) - else: - dataset = datasets[0] - setattr(self, f"{mode.lower()}_dataset", dataset) - logger.debug(f"Build {mode} dataset succeed.") - - def build_mtl_sampler(self, mode): - # multi-task sampler - cfg_sampler = self.config["DataLoader"][mode]["sampler"] - sampler_name = cfg_sampler.pop("name") - dataset = getattr(self, f"{mode.lower()}_dataset") - batch_sampler = eval(sampler_name)(dataset, **cfg_sampler) - logger.debug("build batch_sampler({}) success...".format(sampler_name)) - setattr(self, f"{mode.lower()}_sampler", batch_sampler) - logger.debug(f"Build {mode} sampler succeed.") - - def build_mtl_loader(self, mode): - # multi-task data loader - config_loader = self.config["DataLoader"][mode]["loader"] - dataset = getattr(self, f"{mode.lower()}_dataset") - sampler = getattr(self, f"{mode.lower()}_sampler") - data_loader = DataLoader( - dataset=dataset, - places=self.device, - num_workers=config_loader.num_workers, - return_list=True, - use_shared_memory=config_loader.use_shared_memory, - batch_sampler=sampler, - worker_init_fn=self.worker_init_fn) - setattr(self, f"{mode.lower()}_dataloader", data_loader) - logger.debug(f"Build {mode} dataloader succeed.") - - def build_model(self, opt=None): - model_config = copy.deepcopy(self.config["Model"]) - if model_config.get(opt, None): - model_config = model_config[opt] - # structure - model_name = model_config["name"] - # backbone - config_backbone = model_config["backbone"] - backbone_name = config_backbone.pop("name") - backbone = eval(backbone_name)(**config_backbone) - # head - config_heads = model_config["heads"] - head_dic = {} - for head_item in config_heads: - cfg_head = copy.deepcopy(head_item[list(head_item.keys())[0]]) - task_ids = cfg_head.pop("task_ids") - class_nums = cfg_head.pop("class_nums") - head_class = cfg_head.pop("name") - if not isinstance(head_class, list): - head_class = [head_class] * len(task_ids) - if not isinstance(class_nums, list): - class_nums = [class_nums] * len(task_ids) - for i, task_id in enumerate(task_ids): - head_dic[self.task_names[task_id]] = (eval(head_class[i])( - class_num=class_nums[i], **cfg_head)) - # merge - model = eval(model_name)(backbone, head_dic, self.recompute, - self.recompute_params) - setattr(self, f"{opt.lower()}_model", model) - param_size, size_unit = self.params_counts(model) - logger.info( - f"Build {opt} model succeed, the number of parameters is: {param_size:.3f}{size_unit}." - ) - - def build_loss(self): - cfg_loss = self.config["Loss"][self.mode] - self.loss_func = MTLoss(self.task_names, cfg_loss) - logger.debug(f"build {self.mode} loss {self.loss_func} success.") - - def build_lr_scheduler(self): - lr_config = copy.deepcopy(self.config.get("LRScheduler", None)) - self.lr_decay_unit = lr_config.get("decay_unit", "step") - lr_config.update({ - "epochs": self.epochs, - "step_each_epoch": len(self.train_dataloader) - }) - if "name" in lr_config: - lr_name = lr_config.pop("name") - lr = eval(lr_name)(**lr_config) - if isinstance(lr, paddle.optimizer.lr.LRScheduler): - self.lr_scheduler = lr - else: - self.lr_scheduler = lr() - else: - self.lr_scheduler = lr_config["learning_rate"] - logger.debug("build lr ({}) success..".format(self.lr_scheduler)) - - def build_optimizer(self): - opt_config = copy.deepcopy(self.config["Optimizer"]) - grad_clip = None - grad_clip_config = opt_config.pop('grad_clip', None) - if grad_clip_config is not None: - grad_clip_name = grad_clip_config.pop('name', - 'ClipGradByGlobalNorm') - grad_clip = eval(grad_clip_name)(**grad_clip_config) - no_weight_decay_name = opt_config.pop('no_weight_decay_name', []) - - param_group = defaultdict(list) - for n, p in self.model.named_parameters(): - state = copy.deepcopy(p.__dict__) - if any(nd in n for nd in no_weight_decay_name): - state['no_weight_decay'] = True - param_group[str(state)].append(p) - - # fuse params - for key in param_group: - if 'gpu' not in paddle.get_device(): - continue - if "'is_distributed': True" in key: - continue - if "'has_sparse_grad': True" in key: - continue - param_group[key] = get_fused_params(param_group[key]) - - # bulid optimizer params - params = [] - for key in param_group: - group = {'params': param_group[key]} - - if "'is_distributed': True" in key: - group['is_distributed'] = True - - if 'no_weight_decay' in key: - group['weight_decay'] = 0.0 - - params.append(group) - - optim_name = opt_config.pop('name') - self.optimizer = eval(optim_name)(params, - lr=self.lr_scheduler, - grad_clip=grad_clip, - **opt_config) - - logger.debug("build optimizer ({}) success..".format(self.optimizer)) - - def build_metrics(self, mode): - cfg_metric = self.config.get("Metric", None) - metrics = [] - if cfg_metric is not None: - metric_func_list = cfg_metric[mode] - for item in metric_func_list: - func_name = list(item.keys())[0] - metrics.append(eval(func_name)(**item[func_name])) - setattr(self, f"{mode.lower()}_metrics", metrics) - logger.debug(f"Build {mode} metrics succeed.") - - def build_distill_loss(self): - cfg_loss = self.config["Distillation"].get("soft_loss", None) - assert cfg_loss is not None, "distillation loss should not be None" - self.distill_loss = MTLoss(self.task_names, cfg_loss) - logger.debug("build distill loss success.") + # lr scheduler + lr_config = copy.deepcopy(self.config.get("LRScheduler", None)) + self.lr_decay_unit = lr_config.get("decay_unit", "step") + self.lr_scheduler = None + if lr_config is not None: + self.lr_scheduler = build_lr_scheduler( + lr_config, self.epochs, len(self.train_dataloader)) + # optimizer + self.optimizer = build_optimizer(self.config["Optimizer"], + self.lr_scheduler, self.model) + + self.loss_func = build_mtl_loss(self.task_names, + self.config["Loss"][self.mode]) def load_model(self): if self.checkpoint: @@ -367,14 +175,6 @@ def load_model(self): self.scaler) elif self.pretrained_model: self.model.load_pretrained(self.pretrained_model, self.rank) - if self.distillation: - teacher_model_path = self.config["Global"].get( - "teacher_checkpoint", None) - assert teacher_model_path is not None, "Lack of teacher checkpoint, " \ - "which must be loaded first in distillation mode " - self.teacher_model.load_pretrained(teacher_model_path, self.rank) - self.teacher_model.eval() - logger.info(f"Teacher model initialized.") def train(self): self.load_model() @@ -385,7 +185,7 @@ def train(self): metric_results = {} if self.eval_during_train and self.eval_unit == "epoch" \ and (epoch + 1) % self.eval_interval == 0: - metric_results = self.test() + metric_results = self.eval() # save model if (epoch + 1) % self.save_interval == 0 or (epoch + 1 ) == self.epochs: @@ -399,20 +199,13 @@ def train(self): def train_one_epoch(self, epoch): step = 0 avg_loss = 0 # average loss in the lasted `self.print_batch_step` steps - for images, labels, tasks in self.train_dataloader: + for images, targets in self.train_dataloader: start = time.time() step += 1 # compute loss with paddle.amp.auto_cast(self.fp16_params): logits = self.model(images) - _, total_loss = self.loss_func(logits, labels, tasks) - if self.distillation: - with paddle.no_grad(): - teacher_logits = self.teacher_model(images) - _, total_soft_loss = self.distill_loss( - logits, teacher_logits, tasks) - total_loss = self.soft_weight * total_soft_loss + ( - 1 - self.soft_weight) * total_loss + _, total_loss = self.loss_func(logits, targets) scaled = self.scaler.scale(total_loss) scaled.backward() grad_sync(self.optimizer.param_groups) @@ -449,8 +242,10 @@ def eval(self): results = {} bs = {} self.model.eval() - for images, labels, tasks in self.eval_dataloader: + for images, targets in self.eval_dataloader: step += 1 + labels = targets["label"] + tasks = targets["task"] logits = self.model(images) for idx, task_name in enumerate(self.task_names): cond = tasks == idx @@ -458,19 +253,22 @@ def eval(self): continue preds = logits[task_name][cond] labels = labels[cond] - for eval_metric in self.eval_metrics: - task_metric = eval_metric(preds, labels) - metric_name = str(eval_metric).replace("()", "") - results[idx] = results.get(idx, {metric_name: {}}) - for key in task_metric: + results[idx] = results.get(idx, {}) + task_metric = self.eval_metrics(preds, labels) + for metric_name in task_metric: + results[idx][metric_name] = results[idx].get(metric_name, + {}) + for key in task_metric[metric_name]: results[idx][metric_name][key] = \ - results[idx][metric_name].get(key, 0) + task_metric[key] + results[idx][metric_name].get(key, 0) + task_metric[metric_name][key] bs[idx] = bs.get(idx, 0) + 1 self.model.train() for idx in results: for metric in results[idx]: for key in results[idx][metric]: results[idx][metric][key] /= bs[idx] + for task_id in results: + logger.info(f"metrics - task{task_id}: {results[task_id]}") return results @paddle.no_grad() @@ -479,19 +277,23 @@ def test(self): results = {} bs = {} self.model.eval() - for images, targets, tasks in self.test_dataloader: + for images, targets in self.test_dataloader: step += 1 + labels = targets["label"] + tasks = targets["task"] logits = self.model(images) - for idx, task_id in enumerate(tasks[0]): - preds = logits[self.task_names[task_id]] - labels = targets[:, idx] - for metric in self.test_metrics: - task_metric = metric(preds, labels) - metric_name = str(metric).replace("()", "") - results[idx] = results.get(idx, {metric_name: {}}) - for key in task_metric: + for idx in range(len(tasks)): + task_id = tasks[idx][0] + preds_i = logits[self.task_names[task_id]] + labels_i = labels[:, idx] + results[idx] = results.get(idx, {}) + task_metric = self.test_metrics(preds_i, labels_i) + for metric_name in task_metric: + results[idx][metric_name] = results[idx].get(metric_name, + {}) + for key in task_metric[metric_name]: results[idx][metric_name][key] = \ - results[idx][metric_name].get(key, 0) + task_metric[key] + results[idx][metric_name].get(key, 0) + task_metric[metric_name][key] bs[idx] = bs.get(idx, 0) + 1 self.model.train() for idx in results: diff --git a/plsc/loss/MTLoss.py b/plsc/loss/MTLoss.py index 929dc75..73c4056 100644 --- a/plsc/loss/MTLoss.py +++ b/plsc/loss/MTLoss.py @@ -13,7 +13,7 @@ # limitations under the License. import copy - +from collections import Iterable import paddle import paddle.nn as nn @@ -26,32 +26,37 @@ class MTLoss(nn.Layer): multi-task loss framework """ - def __init__(self, task_names, cfg): + def __init__(self, task_names, losses, weights=1.0): super().__init__() self.loss_func = {} - self.loss_weight = {} self.task_names = task_names + self.instance_losses(losses) + self.loss_weight = {} + if isinstance(weights, float): + weights = len(self.loss_func) * [weights] + assert len(self.loss_func) == len( + weights), "Length of loss_func should be equal to weights" + weight_sum = sum(weights) + for task_id in self.loss_func: + self.loss_weight[task_id] = weights[task_id] / weight_sum + + def instance_losses(self, losses): + assert isinstance(losses, Iterable) and len(losses) > 0, \ + "losses should be iterable and length greater than 0" + self.loss_func = {} self.loss_names = {} - assert isinstance(cfg, list), "operator config should be a list" - for loss_item in cfg: - # more than 1 loss func, 1 loss func has more than 1 tasks - assert isinstance(loss_item, dict), "yaml format error" - loss_name = list(loss_item.keys())[0] - param = loss_item[loss_name] - task_id_list = param.pop("task_ids", [0]) # default task 0 - assert "weight" in param, \ - "weight must be in param, but param just contains {}".format( - param.keys()) - loss_weight = param.pop("weight", 1.0) - if isinstance(loss_weight, float): - loss_weight = len(task_id_list) * [loss_weight] - assert len(loss_weight) == len(task_id_list), \ - "task weights length must be equal to task number" - weight_sum = sum(loss_weight) - for task_id in task_id_list: - self.loss_names[task_id] = loss_name - self.loss_weight[task_id] = loss_weight[task_id] / weight_sum - self.loss_func[task_id] = eval(loss_name)(**param) + + for loss_item in losses: + assert isinstance(loss_item, dict) and len(loss_item.keys()) == 1, \ + "item in losses should be config dict whose length is one(loss class config)" + name = list(loss_item.keys())[0] + params = loss_item[name] + task_ids = params.pop("task_ids", [0]) + if not isinstance(task_ids, list): + task_ids = [task_ids] + for task_id in task_ids: + self.loss_func[task_id] = eval(name)(**params) + self.loss_names[task_id] = name @staticmethod def cast_fp32(input): @@ -59,18 +64,25 @@ def cast_fp32(input): input = paddle.cast(input, 'float32') return input - def __call__(self, input, target, task): + def __call__(self, input, target): + # target: [label, task] loss_dict = {} total_loss = 0.0 + assert isinstance( + target, dict), "target shold be a dict including keys(label, task)" + label = target["label"] + task = target["task"] for idx in self.loss_func: cond = task == idx logits = input[self.task_names[idx]][cond] - if isinstance(target, dict): - labels = target[self.task_names[idx]][cond] + if isinstance(label, dict): + if self.task_names[idx] in label: + labels = label[self.task_names[idx]][cond] + else: + print("label should be a tensor, not dict") else: - labels = target[cond] + labels = label[cond] loss = self.loss_func[idx](logits, labels) - weight = self.loss_weight[idx] loss_dict[idx] = loss[self.loss_names[idx]] - total_loss += loss_dict[idx] * weight + total_loss += loss_dict[idx] * self.loss_weight[idx] return loss_dict, total_loss diff --git a/plsc/loss/__init__.py b/plsc/loss/__init__.py index 9b50253..2844e35 100644 --- a/plsc/loss/__init__.py +++ b/plsc/loss/__init__.py @@ -19,6 +19,7 @@ from plsc.utils import logger from .celoss import CELoss, ViTCELoss +from .MTLoss import MTLoss from .marginloss import MarginLoss @@ -59,3 +60,10 @@ def build_loss(config): module_class = CombinedLoss(copy.deepcopy(config)) logger.debug("build loss {} success.".format(module_class)) return module_class + + +def build_mtl_loss(task_names, cfg_loss): + loss_name = cfg_loss.pop("name") + module_class = eval(loss_name)(task_names, **cfg_loss) + logger.debug("build loss {} success.".format(module_class)) + return module_class diff --git a/plsc/metric/__init__.py b/plsc/metric/__init__.py index a7d8e77..f739c22 100644 --- a/plsc/metric/__init__.py +++ b/plsc/metric/__init__.py @@ -39,7 +39,9 @@ def __init__(self, config_list): def __call__(self, *args, **kwargs): metric_dict = OrderedDict() for idx, metric_func in enumerate(self.metric_func_list): - metric_dict.update(metric_func(*args, **kwargs)) + metric_dict[str(metric_func).replace("()", "")] = metric_func( + *args, **kwargs) + # metric_dict.update(metric_func(*args, **kwargs)) return metric_dict diff --git a/plsc/models/__init__.py b/plsc/models/__init__.py index fc7ba50..4259b92 100644 --- a/plsc/models/__init__.py +++ b/plsc/models/__init__.py @@ -22,14 +22,32 @@ from .face_vit import * from .mobilefacenet import * from .cait import * +from .multi_task.MTLModel import * __all__ = ["build_model"] -def build_model(config): +def build_model(config, **kwargs): config = copy.deepcopy(config) model_type = config.pop("name") mod = importlib.import_module(__name__) + config.update(kwargs) + model = getattr(mod, model_type)(**config) + assert isinstance( + model, Model), 'model must inherit from plsc.models.layers.Model' + return model + + +def build_mtl_model(task_names, recompute_on, recompute_params, config): + + config = copy.deepcopy(config) + model_type = config.pop("name") + mod = importlib.import_module(__name__) + config.update({ + "task_names": task_names, + "recompute_on": recompute_on, + "recompute_params": recompute_params + }) model = getattr(mod, model_type)(**config) assert isinstance( model, Model), 'model must inherit from plsc.models.layers.Model' diff --git a/plsc/models/multi_task/MTLModel.py b/plsc/models/multi_task/MTLModel.py index 0ef144c..8312e2b 100644 --- a/plsc/models/multi_task/MTLModel.py +++ b/plsc/models/multi_task/MTLModel.py @@ -24,6 +24,8 @@ from paddle.nn import Layer, LayerDict, LayerList from plsc.models.layers.base_model import Model from plsc.core.recompute import wrap_forward, recompute_forward +from plsc.models.multi_task.ResNet_backbone import * +from plsc.models.multi_task.head import * class MTLModel(Model): @@ -33,30 +35,62 @@ class MTLModel(Model): """ def __init__(self, - backbone: Layer, - encoder_heads: Dict, + task_names, + backbone, + heads, recompute_on=False, recompute_params=None): """ Args: - backbone: backbone for feature extraction - encoder_heads: Dict + task_names: task name list + backbone: backbone for feature extraction (or config dict) + encoder_heads: Dict (or config list) recompute_on: if recompute is used recompute_params: recompute layers """ super(MTLModel, self).__init__() + self.task_names = task_names if recompute_params is None: recompute_params = {} - self.backbone = backbone + if isinstance(backbone, Model): + self.backbone = backbone + else: + self.backbone = self.instances_from_cfg(backbone) # {task_names: Layer} - self.encoder_heads = LayerDict(sublayers=encoder_heads) + heads = self.instances_from_cfg(heads) + self.heads = LayerDict(sublayers=heads) self.recompute_on = recompute_on if self.recompute_on: self.recompute_warp(self.backbone, **recompute_params) - for task_name in self.encoder_heads: - self.recompute_warp(self.encoder_heads[task_name], - **recompute_params) + for task_name in self.heads: + self.recompute_warp(self.heads[task_name], **recompute_params) + + def instances_from_cfg(self, cfg): + if isinstance(cfg, dict): + name = cfg.pop("name", None) + if name is not None: + try: + module = eval(name)(**cfg) + except Exception as e: + print("instance cfg error: ", e) + else: + return module + if isinstance(cfg, list): + module_dic = {} + for item in cfg: + if isinstance(item, dict) and len(item) == 1: + name = list(item.keys())[0] + params = item[name] + task_ids = params.pop("task_ids", None) + class_nums = params.pop("class_nums", None) + if task_ids and class_nums: + for task_id, class_num in zip(task_ids, class_nums): + module = eval(name)(class_num=class_num, **params) + module_dic[self.task_names[task_id]] = module + if len(module_dic) > 0: + return module_dic + return None def recompute_warp(self, model, @@ -69,20 +103,17 @@ def recompute_warp(self, exclude_names = ["Dropout", "dropout", "pool"] for idx, (name, layer) in enumerate(model._sub_layers.items()): if name in exclude_names: - print(f"continue: {name}") + # print(f"continue: {name}") continue if isinstance(layer, paddle.nn.LayerList): for i, (name, sub_layer) in enumerate(layer.named_sublayers()): if name in exclude_names: - print(f"continue: {name}") continue if layer_interval >= 1 and idx % layer_interval == 0: - print('recompute: ', name) sub_layer.forward = wrap_forward(sub_layer.forward, recompute_forward) else: if layer_interval >= 1 and idx % layer_interval == 0: - print('recompute: ', name) layer.forward = wrap_forward(layer.forward, recompute_forward) @@ -91,10 +122,10 @@ def forward(self, inputs, output_task_names=None): features = self.backbone(inputs) if output_task_names is not None: for task_name in output_task_names: - output[task_name] = self.encoder_heads[task_name](features) + output[task_name] = self.heads[task_name](features) else: - for task_name in self.encoder_heads: - output[task_name] = self.encoder_heads[task_name](features) + for task_name in self.heads: + output[task_name] = self.heads[task_name](features) return output def save(self, path, local_rank=0, rank=0): diff --git a/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml b/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml index eeaaaef..1d078e8 100644 --- a/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml +++ b/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml @@ -34,56 +34,36 @@ DistributedStrategy: # model architecture Model: - Teacher: - name: MTLModel - backbone: - name: IResNet50 - num_features: 512 - data_format: "NHWC" - heads: - - head0: # head 类型,一个类型的head可以支持多个任务,但是每个任务有一个head实例 - name: TaskBlock - task_ids: [0, 1, 2, 3, 4] - num_channels: 512 - num_filters: 64 - class_nums: [2, 10, 6, 5, 3] - data_format : "NHWC" - Student: - name: MTLModel - backbone: - name: IResNet18 - num_features: 512 - data_format: "NHWC" - heads: - - head0: - name: TaskBlock - task_ids: [ 0, 1, 2, 3, 4 ] - class_nums: [2, 10, 6, 5, 3] - num_channels: 512 - num_filters: 64 - data_format: "NHWC" - - -Distillation: - Enabled: True - soft_weight: 0.9 - soft_loss: - - MSELoss: - task_ids: [ 0, 1, 2, 3, 4 ] - weight: [ 2, 2, 2, 2, 1 ] - temperature: 2 + name: MTLModel + backbone: + name: IResNet50 + num_features: 512 + data_format: "NHWC" + heads: + - TaskBlock: # head 类型,一个类型的head可以支持多个任务,但是每个任务有一个head实例 + task_ids: [0, 1, 2, 3, 4] + class_nums: [2, 10, 6, 5, 3] + num_channels: 512 + num_filters: 64 + data_format : "NHWC" # loss function config for traing/eval process Loss: Train: - - ViTCELoss: - task_ids: [0, 1, 2, 3, 4] - weight: [2, 2, 2, 2, 1] - epsilon: 0.0001 + name: MTLoss + weights: [2, 2, 2, 2, 1] + losses: + - ViTCELoss: + task_ids: [0, 1, 2, 3, 4] + epsilon: 0.0001 Eval: - - CELoss: - weight: 1.0 + name: MTLoss + weights: [2, 2, 2, 2, 1] + losses: + - CELoss: + task_ids: [0, 1, 2, 3, 4] + epsilon: 0.0001 LRScheduler: name: ViTLRScheduler @@ -104,29 +84,30 @@ Optimizer: DataLoader: Train: dataset: - - dataset0: # dataset类型,多个任务可以共用一种类型的dataset,但每个任务有自己的dataset实例,最终会concat成为一个整体的dataset - name: SingleTaskDataset - data_root: ./datasets/ - task_ids: [0, 1, 2, 3, 4] - cls_label_path: [Arched_Eyebrows_label.txt, Attractive_label.txt, Bags_Under_Eyes_label.txt, Bald_label.txt, Clock_Shadow_label.txt] - sample_ratio: [2, 1, 2, 20, 4] - transform_ops: - - DecodeImage: - to_rgb: True - channel_first: False - - RandCropImage: - size: 112 - scale: [0.05, 1.0] - interpolation: bicubic - backend: pil - - RandFlipImage: - flip_code: 1 - - NormalizeImage: - scale: 1.0/255.0 - mean: [0.5, 0.5, 0.5] - std: [0.5, 0.5, 0.5] - order: '' - - ToCHWImage: + name: ConcatDataset + dataset_ratio: [2, 1, 2, 20, 4] + datasets: + - SingleTaskDataset: # dataset类型,多个任务可以共用一种类型的dataset,但每个任务有自己的dataset实例,最终会concat成为一个整体的dataset + data_root: ./datasets/ + task_ids: [0, 1, 2, 3, 4] + label_path: [Arched_Eyebrows_label.txt, Attractive_label.txt, Bags_Under_Eyes_label.txt, Bald_label.txt, Clock_Shadow_label.txt] + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 112 + scale: [0.05, 1.0] + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: sampler: name: DistributedBatchSampler @@ -138,28 +119,29 @@ DataLoader: use_shared_memory: True Eval: dataset: - - dataset0: - name: SingleTaskDataset - data_root: ./datasets/ - task_ids: [0] - cls_label_path: [Arched_Eyebrows_label.txt] - transform_ops: - - DecodeImage: - to_rgb: True - channel_first: False - - RandCropImage: - size: 112 - scale: [0.05, 1.0] - interpolation: bicubic - backend: pil - - RandFlipImage: - flip_code: 1 - - NormalizeImage: - scale: 1.0/255.0 - mean: [0.5, 0.5, 0.5] - std: [0.5, 0.5, 0.5] - order: '' - - ToCHWImage: + name: ConcatDataset + datasets: + - SingleTaskDataset: + data_root: ./datasets/ + task_ids: [0] + label_path: [Arched_Eyebrows_label.txt] + transform_ops: + - DecodeImage: + to_rgb: True + channel_first: False + - RandCropImage: + size: 112 + scale: [0.05, 1.0] + interpolation: bicubic + backend: pil + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + order: '' + - ToCHWImage: sampler: name: DistributedBatchSampler batch_size: 128 @@ -170,10 +152,11 @@ DataLoader: use_shared_memory: True Test: dataset: - - dataset0: - name: MultiTaskDataset + name: ConcatDataset + datasets: + - MultiTaskDataset: data_root: ./datasets/ - cls_label_path: [test.txt] + label_path: [test.txt] task_ids: [[0, 1, 2, 3, 4]] transform_ops: - DecodeImage: @@ -210,7 +193,7 @@ Metric: topk: [1, 5] Test: - TopkAcc: - topk: [ 1, 5 ] + topk: [1, 5] Export: export_type: paddle From b2de7a20762d5742b2860ea3db65cc15c4392a46 Mon Sep 17 00:00:00 2001 From: zhaoqi10 Date: Thu, 2 Feb 2023 19:04:50 +0800 Subject: [PATCH 3/5] fix: update import methods to adapt paddle and plsc updating and merge mtl_train to train. --- plsc/engine/__init__.py | 2 ++ .../multi_task_classfication/__init__.py | 2 ++ plsc/models/multi_task/MTLModel.py | 2 +- plsc/models/multi_task/ResNet_backbone.py | 2 +- plsc/utils/config.py | 7 +++- tools/mtl_train.py | 36 ------------------- tools/train.py | 7 ++-- 7 files changed, 17 insertions(+), 41 deletions(-) delete mode 100644 tools/mtl_train.py diff --git a/plsc/engine/__init__.py b/plsc/engine/__init__.py index 97043fd..93b77c3 100644 --- a/plsc/engine/__init__.py +++ b/plsc/engine/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from plsc.engine.engine import Engine +from plsc.engine.multi_task_classfication import MTLEngine diff --git a/plsc/engine/multi_task_classfication/__init__.py b/plsc/engine/multi_task_classfication/__init__.py index 4d4247d..81f3da2 100644 --- a/plsc/engine/multi_task_classfication/__init__.py +++ b/plsc/engine/multi_task_classfication/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from plsc.engine.multi_task_classfication.trainer import MTLEngine diff --git a/plsc/models/multi_task/MTLModel.py b/plsc/models/multi_task/MTLModel.py index 8312e2b..9e99994 100644 --- a/plsc/models/multi_task/MTLModel.py +++ b/plsc/models/multi_task/MTLModel.py @@ -22,7 +22,7 @@ import paddle from paddle.nn import Layer, LayerDict, LayerList -from plsc.models.layers.base_model import Model +from plsc.models.base_model import Model from plsc.core.recompute import wrap_forward, recompute_forward from plsc.models.multi_task.ResNet_backbone import * from plsc.models.multi_task.head import * diff --git a/plsc/models/multi_task/ResNet_backbone.py b/plsc/models/multi_task/ResNet_backbone.py index c9e721d..da3686d 100644 --- a/plsc/models/multi_task/ResNet_backbone.py +++ b/plsc/models/multi_task/ResNet_backbone.py @@ -23,7 +23,7 @@ import paddle.nn as nn from plsc.nn import init -from plsc.models.layers.base_model import Model +from plsc.models.base_model import Model import math diff --git a/plsc/utils/config.py b/plsc/utils/config.py index 09d8529..397ee56 100644 --- a/plsc/utils/config.py +++ b/plsc/utils/config.py @@ -188,7 +188,12 @@ def parse_args(): '--config', type=str, default='configs/config.yaml', - help='config file path') + help='config file path.') + parser.add_argument( + '-t', + '--mtl', + action='store_true', + help='The option of multi-task learning.') parser.add_argument( '-o', '--override', diff --git a/tools/mtl_train.py b/tools/mtl_train.py deleted file mode 100644 index 285ea5b..0000000 --- a/tools/mtl_train.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -paddle.disable_static() - -from plsc.utils import config as cfg_util -from plsc.engine.multi_task_classfication.trainer import MTLEngine - - -def main(): - args = cfg_util.parse_args() - config = cfg_util.get_config( - args.config, overrides=args.override, show=False) - config.profiler_options = args.profiler_options - engine = MTLEngine(config, mode="Train") - engine.train() - - -if __name__ == "__main__": - main() diff --git a/tools/train.py b/tools/train.py index c7fcb43..486084c 100644 --- a/tools/train.py +++ b/tools/train.py @@ -20,7 +20,7 @@ paddle.disable_static() from plsc.utils import config as cfg_util -from plsc.engine.engine import Engine +from plsc.engine import Engine, MTLEngine def main(): @@ -28,7 +28,10 @@ def main(): config = cfg_util.get_config( args.config, overrides=args.override, show=False) config.profiler_options = args.profiler_options - engine = Engine(config, mode="train") + if args.mtl: + engine = MTLEngine(config, mode="Train") + else: + engine = Engine(config, mode="train") engine.train() From 98ea524b704dd2c255c1f38cf46859c9d8fe35b6 Mon Sep 17 00:00:00 2001 From: zhaoqi10 Date: Fri, 3 Feb 2023 16:34:14 +0800 Subject: [PATCH 4/5] fix: fix recompute to make it adapt multi-hierarchy network structure. --- plsc/core/recompute.py | 2 +- plsc/data/dataset/mtl_dataset.py | 5 --- .../multi_task_classfication/trainer.py | 16 +++---- plsc/models/__init__.py | 16 ------- plsc/models/multi_task/MTLModel.py | 42 ++----------------- .../multi_task_resnet18_dp_fp16o2_demo.yaml | 7 ++-- 6 files changed, 16 insertions(+), 72 deletions(-) diff --git a/plsc/core/recompute.py b/plsc/core/recompute.py index 8b7bb73..4f08a67 100644 --- a/plsc/core/recompute.py +++ b/plsc/core/recompute.py @@ -33,7 +33,7 @@ def recompute_forward(func, *args, **kwargs): def recompute_warp(model, layerlist_interval=1, names=[]): - for name, layer in model._sub_layers.items(): + for name, layer in model.named_sublayers(): if isinstance(layer, nn.LayerList): for idx, sub_layer in enumerate(layer): if layerlist_interval >= 1 and idx % layerlist_interval == 0: diff --git a/plsc/data/dataset/mtl_dataset.py b/plsc/data/dataset/mtl_dataset.py index 7c23c7e..18f0c84 100644 --- a/plsc/data/dataset/mtl_dataset.py +++ b/plsc/data/dataset/mtl_dataset.py @@ -143,11 +143,6 @@ def __getitem__(self, idx): @property def cummulative_sizes(self): - warnings.warn( - "cummulative_sizes attribute is renamed to " - "cumulative_sizes", - DeprecationWarning, - stacklevel=2) return self.cumulative_sizes diff --git a/plsc/engine/multi_task_classfication/trainer.py b/plsc/engine/multi_task_classfication/trainer.py index e99ce6c..20420d8 100644 --- a/plsc/engine/multi_task_classfication/trainer.py +++ b/plsc/engine/multi_task_classfication/trainer.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +""" +Multi-task learning trainer +""" import copy import math import numpy as np @@ -26,7 +28,8 @@ from plsc.utils.config import print_config from plsc.models import build_model from plsc.loss import build_mtl_loss -from plsc.core import GradScaler, param_sync, grad_sync +from plsc.core import GradScaler, param_sync +from plsc.core import grad_sync, recompute_warp from plsc.optimizer import build_optimizer from plsc.metric import build_metrics from plsc.data import build_dataloader @@ -142,16 +145,15 @@ def build_modules(self): # build model self.model = build_model( - self.config["Model"], - task_names=self.task_names, - recompute_on=self.recompute, - recompute_params=self.recompute_params) + self.config["Model"], task_names=self.task_names) + if self.recompute: + recompute_warp(self.model, **self.recompute_params) param_size, size_unit = self.params_counts(self.model) logger.info( f"The number of parameters is: {param_size:.3f}{size_unit}.") if self.dp: param_sync(self.model) - logger.info("DDP model: sync parameters finished") + logger.info("DDP model: sync parameters finished.") # build lr, opt, loss if self.mode == 'Train': diff --git a/plsc/models/__init__.py b/plsc/models/__init__.py index 4259b92..27a5a2a 100644 --- a/plsc/models/__init__.py +++ b/plsc/models/__init__.py @@ -36,19 +36,3 @@ def build_model(config, **kwargs): assert isinstance( model, Model), 'model must inherit from plsc.models.layers.Model' return model - - -def build_mtl_model(task_names, recompute_on, recompute_params, config): - - config = copy.deepcopy(config) - model_type = config.pop("name") - mod = importlib.import_module(__name__) - config.update({ - "task_names": task_names, - "recompute_on": recompute_on, - "recompute_params": recompute_params - }) - model = getattr(mod, model_type)(**config) - assert isinstance( - model, Model), 'model must inherit from plsc.models.layers.Model' - return model diff --git a/plsc/models/multi_task/MTLModel.py b/plsc/models/multi_task/MTLModel.py index 9e99994..91ae82d 100644 --- a/plsc/models/multi_task/MTLModel.py +++ b/plsc/models/multi_task/MTLModel.py @@ -23,7 +23,6 @@ import paddle from paddle.nn import Layer, LayerDict, LayerList from plsc.models.base_model import Model -from plsc.core.recompute import wrap_forward, recompute_forward from plsc.models.multi_task.ResNet_backbone import * from plsc.models.multi_task.head import * @@ -34,12 +33,7 @@ class MTLModel(Model): Recomputing can be turned on. """ - def __init__(self, - task_names, - backbone, - heads, - recompute_on=False, - recompute_params=None): + def __init__(self, task_names, backbone, heads): """ Args: @@ -51,8 +45,7 @@ def __init__(self, """ super(MTLModel, self).__init__() self.task_names = task_names - if recompute_params is None: - recompute_params = {} + if isinstance(backbone, Model): self.backbone = backbone else: @@ -60,13 +53,9 @@ def __init__(self, # {task_names: Layer} heads = self.instances_from_cfg(heads) self.heads = LayerDict(sublayers=heads) - self.recompute_on = recompute_on - if self.recompute_on: - self.recompute_warp(self.backbone, **recompute_params) - for task_name in self.heads: - self.recompute_warp(self.heads[task_name], **recompute_params) def instances_from_cfg(self, cfg): + # instantiate layer from config dict if isinstance(cfg, dict): name = cfg.pop("name", None) if name is not None: @@ -92,31 +81,6 @@ def instances_from_cfg(self, cfg): return module_dic return None - def recompute_warp(self, - model, - layer_interval=1, - names=[], - exclude_names=None): - # recompute layers in names list or use layer_interval setting, - # layers in excluded names are excluded. - if exclude_names is None: - exclude_names = ["Dropout", "dropout", "pool"] - for idx, (name, layer) in enumerate(model._sub_layers.items()): - if name in exclude_names: - # print(f"continue: {name}") - continue - if isinstance(layer, paddle.nn.LayerList): - for i, (name, sub_layer) in enumerate(layer.named_sublayers()): - if name in exclude_names: - continue - if layer_interval >= 1 and idx % layer_interval == 0: - sub_layer.forward = wrap_forward(sub_layer.forward, - recompute_forward) - else: - if layer_interval >= 1 and idx % layer_interval == 0: - layer.forward = wrap_forward(layer.forward, - recompute_forward) - def forward(self, inputs, output_task_names=None): output = {} features = self.backbone(inputs) diff --git a/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml b/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml index 1d078e8..4f0f8f3 100644 --- a/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml +++ b/task/multi_task_classification/configs/multi_task_resnet18_dp_fp16o2_demo.yaml @@ -26,10 +26,9 @@ FP16: DistributedStrategy: data_parallel: True -# recompute: -# layer_interval: 4 -# names: [] -# exclude_names: ["dropout", "pool", "downsample"] + recompute: + layerlist_interval: 4 + names: [] # model architecture From be8c3249ddbd2ecfd1a879951890771fe56667d8 Mon Sep 17 00:00:00 2001 From: zhaoqi10 Date: Fri, 3 Feb 2023 16:38:48 +0800 Subject: [PATCH 5/5] fix: update time in copyright --- plsc/data/dataset/mtl_dataset.py | 2 +- plsc/engine/multi_task_classfication/__init__.py | 2 +- plsc/engine/multi_task_classfication/trainer.py | 2 +- plsc/loss/MTLoss.py | 2 +- plsc/loss/distill_loss.py | 2 +- plsc/models/multi_task/MTLModel.py | 2 +- plsc/models/multi_task/ResNet_backbone.py | 2 +- plsc/models/multi_task/head.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/plsc/data/dataset/mtl_dataset.py b/plsc/data/dataset/mtl_dataset.py index 18f0c84..72c49df 100644 --- a/plsc/data/dataset/mtl_dataset.py +++ b/plsc/data/dataset/mtl_dataset.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/plsc/engine/multi_task_classfication/__init__.py b/plsc/engine/multi_task_classfication/__init__.py index 81f3da2..41a2aa4 100644 --- a/plsc/engine/multi_task_classfication/__init__.py +++ b/plsc/engine/multi_task_classfication/__init__.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/plsc/engine/multi_task_classfication/trainer.py b/plsc/engine/multi_task_classfication/trainer.py index 20420d8..0dd763d 100644 --- a/plsc/engine/multi_task_classfication/trainer.py +++ b/plsc/engine/multi_task_classfication/trainer.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/plsc/loss/MTLoss.py b/plsc/loss/MTLoss.py index 73c4056..41657fd 100644 --- a/plsc/loss/MTLoss.py +++ b/plsc/loss/MTLoss.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/plsc/loss/distill_loss.py b/plsc/loss/distill_loss.py index 56b77b9..18874ef 100644 --- a/plsc/loss/distill_loss.py +++ b/plsc/loss/distill_loss.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/plsc/models/multi_task/MTLModel.py b/plsc/models/multi_task/MTLModel.py index 91ae82d..22d2321 100644 --- a/plsc/models/multi_task/MTLModel.py +++ b/plsc/models/multi_task/MTLModel.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/plsc/models/multi_task/ResNet_backbone.py b/plsc/models/multi_task/ResNet_backbone.py index da3686d..183f043 100644 --- a/plsc/models/multi_task/ResNet_backbone.py +++ b/plsc/models/multi_task/ResNet_backbone.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/plsc/models/multi_task/head.py b/plsc/models/multi_task/head.py index d436bad..dc7b436 100644 --- a/plsc/models/multi_task/head.py +++ b/plsc/models/multi_task/head.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.