forked from samuelbroscheit/open_knowledge_graph_embeddings
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwikiopenlink-thorough-complex-lstm.yaml
201 lines (171 loc) · 5.76 KB
/
wikiopenlink-thorough-complex-lstm.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
############### META
# cuda devices to use
devices: 0
# turn off cuda, i.e. GPU usage
no_cuda: false
# seed for the RNG
seed: 0
# save output to base directory
results_dir: data/experiments/olpbench/
# leave empty if left empty (null, None) then a name is automatically generated
experiment_dir: null
############### TASKS
train: true
evaluate: false
############### MODEL
# configure the model class
model: LSTMComplexRelationModel
# configure the model's arguments
model_config:
dropout: 0.1
entity_slot_size: 512
init_std: 0.1
normalize: batchnorm
relation_slot_size: 512
sparse: false
experiment_settings:
# choose loss out of bce, kl, ce
loss: bce
# should entity embeddings be computed from tokens?
replace_entities_by_tokens: true
# should relation embeddings be computed from tokens?
replace_relations_by_tokens: true
# max token length for subject and relation mentions
max_lengths_tuple: [10, 10]
############### TRAINING
# max epochs to run
epochs: 100
# batch size
batch_size: 4096
# label smoothing for BCE loss
bce_label_smoothing: 0.0
# learning rate schduler config kwargs dict to tunnel through to pytorch;
# set lr_scheduler to a Pytorch scheduler, none/None else
lr_scheduler_config: null
# gradient clipping
grad_clip: 0
# optimizer config kwargs dict to tunnel through to Pytorch; set optimizer
# to a known Pytorch optimizer and the rest to its arguments
optimization_config:
optimizer: Adagrad
epoch: 0
lr: 0.2
weight_decay: 1.0e-10
# print stats every this many batch steps
print_freq: 10
# save every this many epochs
save_epoch_freq: 1
# save every this many batch steps
save_freq: -1
# evaluate every this many epochs
eval_epoch_freq: 1
# evaluate every this many batch steps
eval_freq: 2000
############### MODEL SELECTION
# perform model selection, early stopping, keeping best models on this metrics.
# Available metrics: mrr, mr, h1, h3, h10, h50
model_select_metric:
- mrr
patience_metric_change: 1.0e-05
patience_metric_max_treshold: null
patience_metric_min_treshold: null
patience_epochs: 50
############### DATASET
# Configure the default dataset class for all splits, with the following choices:
#
# OneToNMentionRelationDataset:
#
# Dataset format:
#
# This dataset class works especially with KGE dataset where the entities
# can be realized with different alternative mentions.
#
# The data format contains the following TAB separated fields per line:
# 5 TAB separated columns, COL 4 and COL 5 are lists of space seperated ids:
#
# COL 1 COL 2 COL 3 COL 4 COL 5
# entity id relation id entity id subj entity ids obj entity ids
#
# This dataset can also be used for standard KGE tasks, then COL 4 and
# COL 5 are identical to COL 1 and COL 3
#
#
# Batch construction for training if use_batch_shared_entities is set to True:
#
# The set of answer entities of all examples in the batch are shared accross
# all examples of the the batch as true or false labels.
#
# For example, given a batch containing the subject-relation prefix (1, 2) with
# object answers [3, 4], and relation-object suffix (4, 5) with subject
# answers [1, 7, 8], then the batch will be constructed as
#
# potential batch shared entity answers [1, 3, 4, 7, 8]
#
# sp prefixes: (1, 2) labels: [0, 1, 1, 0, 0]
#
# po prefixes: (4, 5) labels: [1, 0, 0, 1, 1]
#
#
# Apart from batch_size, the batch construction is influcenced by the following
# two parameters:
#
# Minimum label size for *batch construction*, i.e. when are less batch shared
# entity answers than min_size_batch_labels, then new randomly sampled entities
# are added to that set.
#
# min_size_batch_labels: 4096
#
#
# Maximium label size during *creation of training examples*, i.e. if a
# (subject,relation) prefix has more than max_size_prefix_label number of true
# answers the answers are split up into chunks. The (subject,relation) example
# is then repeated during training for each chunk.
#
# max_size_prefix_label: 64
dataset_dir: data/olpbench/mapped_to_ids
dataset_class: OneToNMentionRelationDataset
# this overrides the default dataset_class
training_dataset_class: OneToNMentionRelationDataset
# training data settings
train_data_config:
input_file: train_data_thorough.txt
batch_size: 4096 # if batch size undefined here then global batch size is used
use_batch_shared_entities: True
min_size_batch_labels: 4096
max_size_prefix_label: 64
# this overrides the default dataset_class
validation_dataset_class: OneToNMentionRelationDataset
# validation data settings
val_data_config:
input_file: validation_data_linked.txt
batch_size: 128 # if batch size undefined here then global batch size is used
use_batch_shared_entities: True # for approximate fast computation during training
min_size_batch_labels: 32768
max_size_prefix_label: -1
# this overrides the default dataset_class
test_dataset_class: OneToNMentionRelationDataset
# test data settings
test_data_config:
input_file: test_data.txt
batch_size: 32 # if batch size undefined here then global batch size is used
use_batch_shared_entities: False # always use all entities for evaluation
min_size_batch_labels: -1
max_size_prefix_label: -1
############### EVALUATION
evaluate_on_validation: true
evaluate_scores_file: null
log_predictions: false
############### RESUMING
# set a checkpoint path here, in conjunction with 'train: false', 'evaluate: true'
# 'evaluate_on_validation: false' this is used for testing.
resume: ''
# a whitelist of the parameter names for resuming a model
resume_filter: null
# a blacklist of the parameter names for resuming a model
resume_freeze: null
# load the args from the checkoint instead of this configurations args
resume_load_args: true
# reset optimizer when resuming
reset_optimizer: false
# start epoch for optimzation regime
#start_epoch: 0