-
Notifications
You must be signed in to change notification settings - Fork 34
/
swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml
148 lines (136 loc) · 3.09 KB
/
swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# global configs
Global:
checkpoint: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
max_num_latest_checkpoint: 0
eval_during_train: True
eval_interval: 1
eval_unit: "epoch"
accum_steps: 1
epochs: 300
print_batch_step: 10
use_visualdl: False
seed: 2023
# FP16 setting
FP16:
level: O1
GradScaler:
init_loss_scaling: 65536.0
DistributedStrategy:
data_parallel: True
# model architecture
Model:
name: swin_base_patch4_window7_224
num_classes: 1000
drop_path_rate: 0.5
# loss function config for traing/eval process
Loss:
Loss:
Train:
- CELoss:
weight: 1.0
Eval:
- CELoss:
weight: 1.0
LRScheduler:
name: TimmCosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 20
warmup_start_lr: 1e-6
decay_unit: 'step'
warmup_prefix: True
Optimizer:
name: AdamW
betas: (0.9, 0.999)
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: ["absolute_pos_embed", "relative_position_bias_table", "norm", "bias"]
grad_clip:
name: ClipGradByGlobalNorm
clip_norm: 5.0
# data loader for train and eval
DataLoader:
Train:
dataset:
name: ImageFolder
root: ./dataset/ILSVRC2012/train
transform:
- RandomResizedCrop:
size: 224
interpolation: bicubic
- RandomHorizontalFlip:
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
mean: [0.485, 0.456, 0.406]
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
- ToCHWImage:
batch_transform:
- TransformOpSampler:
Mixup:
alpha: 0.8
prob: 0.5
epsilon: 0.1
class_num: 1000
Cutmix:
alpha: 1.0
prob: 0.5
epsilon: 0.1
class_num: 1000
sampler:
name: DistributedBatchSampler
batch_size: 128 # accum_steps: 1, total batchsize: 1024
drop_last: False
shuffle: True
loader:
num_workers: 8
use_shared_memory: True
Eval:
dataset:
name: ImageFolder
root: ./dataset/ILSVRC2012/val
transform:
- Resize:
size: 256
interpolation: bicubic
backend: pil
- CenterCrop:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
sampler:
name: DistributedBatchSampler
batch_size: 256
drop_last: False
shuffle: False
loader:
num_workers: 8
use_shared_memory: True
Metric:
Eval:
- TopkAcc:
topk: [1, 5]
Export:
export_type: paddle
input_shape: [None, 3, 224, 224]