Skip to content

Commit 77872d7

Browse files
committed
fix compatibility with higher version of transformers
1 parent c66c961 commit 77872d7

File tree

3 files changed

+11
-10
lines changed

3 files changed

+11
-10
lines changed

delft/sequenceLabelling/preprocess.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,14 @@ def convert_single_text(self, text_tokens, chars_tokens, features_tokens, label_
321321
chars_tokens.append(self.empty_char_vector)
322322

323323
# sub-tokenization
324-
encoded_result = self.tokenizer(text_tokens, add_special_tokens=True, is_split_into_words=True,
325-
max_length=max_seq_length, truncation=True, return_offsets_mapping=True)
324+
encoded_result = self.tokenizer(
325+
text_tokens,
326+
add_special_tokens=True,
327+
is_split_into_words=True,
328+
max_length=max_seq_length,
329+
truncation=True,
330+
return_offsets_mapping=True
331+
)
326332

327333
input_ids = encoded_result.input_ids
328334
offsets = encoded_result.offset_mapping

delft/sequenceLabelling/trainer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@ def train_model(self, local_model, x_train, y_train, f_train=None,
168168

169169
# multiple workers should work with transformer layers, but not with ELMo due to GPU memory limit (with GTX 1080Ti 11GB)
170170
if self.model_config.transformer_name is not None or (self.embeddings and self.embeddings.use_ELMo):
171-
# worker at 0 means the training will be executed in the main thread
172-
nb_workers = 0
171+
# worker at 1 means the training will be executed in the main thread
172+
nb_workers = 1
173173
multiprocessing = False
174174

175175
local_model.fit(training_generator,

delft/utilities/Transformer.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,35 +128,30 @@ def init_preprocessor(self, max_sequence_length: int,
128128
do_lower_case = False
129129

130130
if do_lower_case is not None:
131-
if self.auth_token != None:
131+
if self.auth_token is not None:
132132
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
133-
add_special_tokens=add_special_tokens,
134133
max_length=max_sequence_length,
135134
add_prefix_space=add_prefix_space,
136135
do_lower_case=do_lower_case,
137136
use_auth_token=self.auth_token)
138137
else:
139138
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
140-
add_special_tokens=add_special_tokens,
141139
max_length=max_sequence_length,
142140
add_prefix_space=add_prefix_space,
143141
do_lower_case=do_lower_case)
144142
else:
145143
if self.auth_token != None:
146144
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
147-
add_special_tokens=add_special_tokens,
148145
max_length=max_sequence_length,
149146
add_prefix_space=add_prefix_space,
150147
use_auth_token=self.auth_token)
151148
else:
152149
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
153-
add_special_tokens=add_special_tokens,
154150
max_length=max_sequence_length,
155151
add_prefix_space=add_prefix_space)
156152

157153
elif self.loading_method == LOADING_METHOD_LOCAL_MODEL_DIR:
158154
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir_path,
159-
add_special_tokens=add_special_tokens,
160155
max_length=max_sequence_length,
161156
add_prefix_space=add_prefix_space)
162157
elif self.loading_method == LOADING_METHOD_PLAIN_MODEL:

0 commit comments

Comments
 (0)