-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathrun_asr_fine_tuning.sh
executable file
·147 lines (130 loc) · 4.64 KB
/
run_asr_fine_tuning.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/bin/bash
#
# SPDX-FileCopyrightText: Copyright © <2022> Idiap Research Institute <[email protected]>
#
# SPDX-FileContributor: Juan Zuluaga-Gomez <[email protected]>
#
# SPDX-License-Identifier: MIT-License
# Base script to fine-tunine a wav2vec 2.0 model on a given dataset with HuggingFace
#######################################
# COMMAND LINE OPTIONS,
# high-level variables for training the model. TrainingArguments (HuggingFace)
# You can pass a qsub command (SunGrid Engine)
# :an example is passing --cmd "src/sge/queue.pl h='*'-V", add your configuration
set -euo pipefail
# static vars
cmd=""
_VOCABULARY_PATH="config/vocab.json"
# model and dataset
model_name_or_path="facebook/wav2vec2-base"
dataset_name="experiments/data/uwb_atcc/train"
eval_dataset_name="experiments/data/uwb_atcc/test"
train_split_name="train"
# training hyper-params
preprocessing_num_workers=$(echo "$(( $(nproc)-1 ))" )
num_epochs=50
max_steps=3000
per_device_train_batch_size=12
per_device_eval_batch_size=12
gradient_acc=3
learning_rate="1e-4"
save_steps=1000
eval_steps=500
warmup_steps=1000
logging_steps=1000
max_train_samples=
exp='experiments/results'
overwrite_dir=true
freeze_feature_encoder=true
# Data augmentation details
layerdrop="0.0"
activation_dropout="0.0"
attention_dropout="0.0"
feat_proj_dropout="0.0"
mask_time_prob="0.0"
mask_time_length="12"
mask_feature_prob="0.0"
mask_feature_length="12"
. data/utils/parse_options.sh
optional_args="--max_steps $max_steps"
if [ ! -f "$_VOCABULARY_PATH" ]; then
echo "Vocabulary is empty, you passed $_VOCABULARY_PATH"
echo "We will create a new vocabulary from the training+test file"
else
optional_args="$optional_args --vocab_path=$_VOCABULARY_PATH"
fi
# check if you want to use only a portion of the data and define a new output dir
dataset_dir_name=$(basename $(dirname $dataset_name))
if [ -z "$max_train_samples" ]; then
output_dir=$exp/$(basename $model_name_or_path)/$dataset_dir_name
else
output_dir=$exp/$(basename $model_name_or_path)/${dataset_dir_name}_${max_train_samples}s
optional_args="--max_train_samples=$max_train_samples"
fi
if [ "$overwrite_dir" = "true" ]; then
optional_args="$optional_args --overwrite_output_dir"
elif [ "$overwrite_dir" = "false" ]; then
optional_args="$optional_args"
else
echo "Please give 'true' or 'false' to overwrite_dir, you passed: $overwrite_dir"
exit 0
fi
if [ "$freeze_feature_encoder" = "true" ]; then
optional_args="$optional_args --freeze_feature_encoder"
fi
# redefine the output_dir name:
basename_dir=${layerdrop}ld_${activation_dropout}ad_${attention_dropout}attd_${feat_proj_dropout}fpd_${mask_time_prob}mtp_${mask_time_length}mtl_${mask_feature_prob}mfp_${mask_feature_length}mfl_${gradient_acc}acc
output_dir=${output_dir}/$basename_dir/
# configure a GPU to use if we a defined 'CMD'
if [ ! "$cmd" == 'none' ]; then
basename=train_$(basename $model_name_or_path)_${dataset_dir_name}_${basename_dir}
cmd="$cmd -N ${basename} ${output_dir}/log/train_log"
else
cmd=''
fi
echo "*** About to start the training ***"
echo "*** output folder: $output_dir ***"
# call the training script, use "$cmd" if defined
$cmd python3 src/run_speech_recognition_ctc.py \
--report_to="none" \
--run_name="$output_dir" \
--preprocessing_num_workers="$preprocessing_num_workers" \
--model_name_or_path=$model_name_or_path \
--dataset_name=$dataset_name \
--min_duration_in_seconds=0.2 \
--max_duration_in_seconds=20 \
--eval_dataset_name=$eval_dataset_name \
--train_split_name=$train_split_name \
--output_dir=$output_dir \
--num_train_epochs=$num_epochs \
--per_device_train_batch_size=$per_device_train_batch_size \
--per_device_eval_batch_size=$per_device_eval_batch_size \
--gradient_accumulation_steps=$gradient_acc \
--learning_rate=$learning_rate \
--weight_decay=0.001 \
--warmup_steps=$warmup_steps \
--evaluation_strategy="steps" \
--text_column_name="text" \
--audio_column_name="audio" \
--length_column_name="input_length" \
--chars_to_ignore=", ? . ! \; \: \" “ % ‘ ” � — ’ … –" \
--save_steps=$save_steps \
--eval_steps=$eval_steps \
--logging_steps=$logging_steps \
--layerdrop=$layerdrop \
--activation_dropout=$activation_dropout \
--attention_dropout=$attention_dropout \
--save_total_limit="1" \
--feat_proj_dropout=$feat_proj_dropout \
--mask_time_prob=$mask_time_prob \
--mask_time_length=$mask_time_length \
--mask_feature_prob=$mask_feature_prob \
--mask_feature_length=$mask_feature_length \
--gradient_checkpointing \
--freeze_feature_encoder \
--fp16 \
--group_by_length \
--do_train --do_eval \
$optional_args
echo "Done training of $model_name_or_path in $output_dir"
exit 0