diff --git a/deeppavlov/__init__.py b/deeppavlov/__init__.py index 6765198e9f..277241fef9 100644 --- a/deeppavlov/__init__.py +++ b/deeppavlov/__init__.py @@ -26,11 +26,13 @@ from .download import deep_download from .core.common.chainer import Chainer + # TODO: make better def train_model(config: [str, Path, dict], download: bool = False, recursive: bool = False) -> Chainer: train_evaluate_model_from_config(config, download=download, recursive=recursive) return build_model(config, load_trained=True) + def evaluate_model(config: [str, Path, dict], download: bool = False, recursive: bool = False) -> dict: return train_evaluate_model_from_config(config, to_train=False, download=download, recursive=recursive) diff --git a/deeppavlov/__main__.py b/deeppavlov/__main__.py index 9a34bb3f2f..d5d8c0580d 100644 --- a/deeppavlov/__main__.py +++ b/deeppavlov/__main__.py @@ -1,3 +1,4 @@ if __name__ == '__main__': from .deep import main + main() diff --git a/deeppavlov/core/common/chainer.py b/deeppavlov/core/common/chainer.py index 26f157d96f..8aef21dc6c 100644 --- a/deeppavlov/core/common/chainer.py +++ b/deeppavlov/core/common/chainer.py @@ -46,6 +46,7 @@ class Chainer(Component): out_params: names of pipeline inference outputs in_y: names of additional inputs for pipeline training and evaluation modes """ + def __init__(self, in_x: Union[str, list] = None, out_params: Union[str, list] = None, in_y: Union[str, list] = None, *args, **kwargs) -> None: self.pipe: List[Tuple[Tuple[List[str], List[str]], List[str], Component]] = [] @@ -150,9 +151,9 @@ def append(self, component: Union[Component, FunctionType], in_x: [str, list, di component: NNModel main = True - assert self.train_map.issuperset(in_x+in_y), ('Arguments {} are expected but only {} are set' - .format(in_x+in_y, self.train_map)) - preprocessor = Chainer(self.in_x, in_x+in_y, self.in_y) + assert self.train_map.issuperset(in_x + in_y), ('Arguments {} are expected but only {} are set' + .format(in_x + in_y, self.train_map)) + preprocessor = Chainer(self.in_x, in_x + in_y, self.in_y) for (t_in_x_keys, t_in_x), t_out, t_component in self.train_pipe: if t_in_x_keys: t_in_x = dict(zip(t_in_x_keys, t_in_x)) @@ -160,7 +161,7 @@ def append(self, component: Union[Component, FunctionType], in_x: [str, list, di def train_on_batch(*args, **kwargs): preprocessed = preprocessor.compute(*args, **kwargs) - if len(in_x+in_y) == 1: + if len(in_x + in_y) == 1: preprocessed = [preprocessed] if keys: return component.train_on_batch(**dict(zip(keys, preprocessed))) diff --git a/deeppavlov/core/common/check_gpu.py b/deeppavlov/core/common/check_gpu.py index 56817b3ab7..d768417785 100644 --- a/deeppavlov/core/common/check_gpu.py +++ b/deeppavlov/core/common/check_gpu.py @@ -19,7 +19,6 @@ log = getLogger(__name__) - _gpu_available = None diff --git a/deeppavlov/core/common/errors.py b/deeppavlov/core/common/errors.py index d5d4ce23e2..1cef661aa9 100644 --- a/deeppavlov/core/common/errors.py +++ b/deeppavlov/core/common/errors.py @@ -19,6 +19,7 @@ class ConfigError(Exception): """Any configuration error.""" + def __init__(self, message): super(ConfigError, self).__init__() self.message = message diff --git a/deeppavlov/core/common/metrics_registry.py b/deeppavlov/core/common/metrics_registry.py index 78fa53f005..28bf3a6e1b 100644 --- a/deeppavlov/core/common/metrics_registry.py +++ b/deeppavlov/core/common/metrics_registry.py @@ -29,6 +29,7 @@ def fn_from_str(name: str) -> Callable[..., Any]: def register_metric(metric_name: str) -> Callable[..., Any]: """Decorator for metric registration.""" + def decorate(fn): fn_name = fn.__module__ + ':' + fn.__name__ if metric_name in _REGISTRY and _REGISTRY[metric_name] != fn_name: @@ -36,6 +37,7 @@ def decorate(fn): .format(metric_name)) _REGISTRY[metric_name] = fn_name return fn + return decorate diff --git a/deeppavlov/core/common/params.py b/deeppavlov/core/common/params.py index 8e9afbd1a2..e74bb594a7 100644 --- a/deeppavlov/core/common/params.py +++ b/deeppavlov/core/common/params.py @@ -82,7 +82,7 @@ def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwa _refs.clear() _refs.update(refs) try: - _refs[config_params['id']] = model + _refs[config_params['id']] = model except KeyError: pass return model @@ -100,7 +100,7 @@ def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwa try: spec = inspect.getfullargspec(obj) - if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None: + if 'mode' in spec.args + spec.kwonlyargs or spec.varkw is not None: kwargs['mode'] = mode component = obj(**dict(config_params, **kwargs)) diff --git a/deeppavlov/core/common/prints.py b/deeppavlov/core/common/prints.py index 7f824d2b19..28360f0e51 100644 --- a/deeppavlov/core/common/prints.py +++ b/deeppavlov/core/common/prints.py @@ -18,5 +18,6 @@ class RedirectedPrints(redirect_stdout): """Context manager for temporarily redirecting stdout to another stream """ + def __init__(self, new_target=sys.stderr): super().__init__(new_target=new_target) diff --git a/deeppavlov/core/common/registry.json b/deeppavlov/core/common/registry.json index 49e94c6ed4..a45f35af92 100644 --- a/deeppavlov/core/common/registry.json +++ b/deeppavlov/core/common/registry.json @@ -33,7 +33,6 @@ "dialog_state": "deeppavlov.models.seq2seq_go_bot.dialog_state:DialogState", "dictionary_vectorizer": "deeppavlov.models.vectorizers.word_vectorizer:DictionaryVectorizer", "dirty_comments_preprocessor": "deeppavlov.models.preprocessors.dirty_comments_preprocessor:DirtyCommentsPreprocessor", - "document_bert_ner_iterator": "deeppavlov.dataset_iterators.document_bert_ner_iterator:DocumentBertNerIterator", "document_chunker": "deeppavlov.models.preprocessors.odqa_preprocessors:DocumentChunker", "dstc2_intents_iterator": "deeppavlov.dataset_iterators.dstc2_intents_iterator:Dstc2IntentsDatasetIterator", "dstc2_ner_iterator": "deeppavlov.dataset_iterators.dstc2_ner_iterator:Dstc2NerDatasetIterator", diff --git a/deeppavlov/core/common/registry.py b/deeppavlov/core/common/registry.py index 048bf0b1bd..932c4da714 100644 --- a/deeppavlov/core/common/registry.py +++ b/deeppavlov/core/common/registry.py @@ -45,6 +45,7 @@ def register(name: str = None) -> type: Register classes that could be initialized from JSON configuration file. If name is not passed, the class name is converted to snake-case. """ + def decorate(model_cls: type, reg_name: str = None) -> type: model_name = reg_name or short_name(model_cls) global _REGISTRY diff --git a/deeppavlov/core/data/data_learning_iterator.py b/deeppavlov/core/data/data_learning_iterator.py index d2ee6af42c..26fb83ba50 100644 --- a/deeppavlov/core/data/data_learning_iterator.py +++ b/deeppavlov/core/data/data_learning_iterator.py @@ -31,6 +31,7 @@ class DataLearningIterator: shuffle: whether to shuffle data during batching random: instance of ``Random`` initialized with a seed """ + def split(self, *args, **kwargs): """ Manipulate self.train, self.valid, and self.test into their final form. """ pass diff --git a/deeppavlov/core/data/simple_vocab.py b/deeppavlov/core/data/simple_vocab.py index fde9433079..8332d4d680 100644 --- a/deeppavlov/core/data/simple_vocab.py +++ b/deeppavlov/core/data/simple_vocab.py @@ -40,9 +40,10 @@ class SimpleVocabulary(Estimator): unk_token: label assigned to unknown tokens. freq_drop_load: if True, then frequencies of tokens are set to min_freq on the model load. """ + def __init__(self, special_tokens: Tuple[str, ...] = tuple(), - max_tokens: int = 2**30, + max_tokens: int = 2 ** 30, min_freq: int = 0, pad_with_zeros: bool = False, unk_token: Optional[str] = None, @@ -118,7 +119,7 @@ def load(self): self._add_tokens_with_freqs(tokens, counts) elif not self.load_path.parent.is_dir(): raise ConfigError("Provided `load_path` for {} doesn't exist!".format( - self.__class__.__name__)) + self.__class__.__name__)) else: raise ConfigError("`load_path` for {} is not provided!".format(self)) @@ -135,7 +136,7 @@ def load_line(self, ln): else: token, cnt = ln.split('\t', 1) return token, cnt - + @property def len(self): return len(self) diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py index e1b3cf29e7..5289779165 100644 --- a/deeppavlov/core/data/utils.py +++ b/deeppavlov/core/data/utils.py @@ -279,7 +279,7 @@ def _copytree(src: Path, dest: Path) -> None: shutil.copy(str(f), str(f_dest)) -def file_md5(fpath: Union[str, Path], chunk_size: int = 2**16) -> Optional[str]: +def file_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> Optional[str]: """Return md5 hash value for file contents. Args: diff --git a/deeppavlov/core/layers/keras_layers.py b/deeppavlov/core/layers/keras_layers.py index eb0dd60b85..7c1d379fee 100644 --- a/deeppavlov/core/layers/keras_layers.py +++ b/deeppavlov/core/layers/keras_layers.py @@ -33,7 +33,7 @@ def expand_tile(units, axis): repetitions = [1, 1, 1, 1] repetitions[axis] = n_time_steps if axis == 1: - expanded = Reshape(target_shape=( (1,) + K.int_shape(units)[1:] ))(units) + expanded = Reshape(target_shape=((1,) + K.int_shape(units)[1:]))(units) else: expanded = Reshape(target_shape=(K.int_shape(units)[1:2] + (1,) + K.int_shape(units)[2:]))(units) return K.tile(expanded, repetitions) @@ -113,9 +113,9 @@ def build(self, input_shape): self.W = [] for i in range(self.output_dim): self.W.append(self.add_weight(name='kernel', - shape=(1, input_shape[0][-1]), - initializer='uniform', - trainable=True)) + shape=(1, input_shape[0][-1]), + initializer='uniform', + trainable=True)) super(FullMatchingLayer, self).build(input_shape) # Be sure to call this at the end def call(self, x): @@ -153,9 +153,9 @@ def build(self, input_shape): self.W = [] for i in range(self.output_dim): self.W.append(self.add_weight(name='kernel', - shape=(1, input_shape[0][-1]), - initializer='uniform', - trainable=True)) + shape=(1, input_shape[0][-1]), + initializer='uniform', + trainable=True)) super(MaxpoolingMatchingLayer, self).build(input_shape) # Be sure to call this at the end def call(self, x): @@ -193,9 +193,9 @@ def build(self, input_shape): self.W = [] for i in range(self.output_dim): self.W.append(self.add_weight(name='kernel', - shape=(1, input_shape[0][-1]), - initializer='uniform', - trainable=True)) + shape=(1, input_shape[0][-1]), + initializer='uniform', + trainable=True)) super(AttentiveMatchingLayer, self).build(input_shape) # Be sure to call this at the end def call(self, x): @@ -241,9 +241,9 @@ def build(self, input_shape): self.W = [] for i in range(self.output_dim): self.W.append(self.add_weight(name='kernel', - shape=(1, input_shape[0][-1]), - initializer='uniform', - trainable=True)) + shape=(1, input_shape[0][-1]), + initializer='uniform', + trainable=True)) super(MaxattentiveMatchingLayer, self).build(input_shape) # Be sure to call this at the end def call(self, x): diff --git a/deeppavlov/core/layers/tf_attention_mechanisms.py b/deeppavlov/core/layers/tf_attention_mechanisms.py index cf7460290d..c75f6f3282 100644 --- a/deeppavlov/core/layers/tf_attention_mechanisms.py +++ b/deeppavlov/core/layers/tf_attention_mechanisms.py @@ -47,8 +47,8 @@ def general_attention(key, context, hidden_size, projected_align=False): tf.layers.dense(key, hidden_size, kernel_initializer=xav()) r_projected_key = tf.reshape(projected_key, shape=[-1, hidden_size, 1]) - lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) - lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) + lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) + lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) (output_fw, output_bw), states = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, @@ -139,8 +139,8 @@ def cs_general_attention(key, context, hidden_size, depth, projected_align=False kernel_initializer=xav(), name='projected_context') - lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) - lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) + lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) + lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) (output_fw, output_bw), states = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, @@ -192,8 +192,8 @@ def bahdanau_attention(key, context, hidden_size, projected_align=False): tf.tile(tf.reshape(projected_key, shape=[-1, 1, hidden_size]), [1, max_num_tokens, 1]) - lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) - lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) + lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) + lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) (output_fw, output_bw), states = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, @@ -308,8 +308,8 @@ def cs_bahdanau_attention(key, context, hidden_size, depth, projected_align=Fals tf.tile(tf.reshape(projected_key, shape=[-1, 1, hidden_size]), [1, max_num_tokens, 1]) - lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) - lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2) + lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) + lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2) (output_fw, output_bw), states = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, diff --git a/deeppavlov/core/layers/tf_csoftmax_attention.py b/deeppavlov/core/layers/tf_csoftmax_attention.py index f73bcc3079..764a727dc2 100644 --- a/deeppavlov/core/layers/tf_csoftmax_attention.py +++ b/deeppavlov/core/layers/tf_csoftmax_attention.py @@ -114,28 +114,27 @@ def attention_gen_step(hidden_for_sketch, hidden_for_attn_alignment, sketch, key attn_alignment_dims = hidden_for_attn_alignment.get_shape().as_list() attn_alignment_hidden_size = attn_alignment_dims[2] - repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1,num_tokens, 1)) - concat_mem = tf.concat([hidden_for_sketch, repeated_sketch],-1) + repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1, num_tokens, 1)) + concat_mem = tf.concat([hidden_for_sketch, repeated_sketch], -1) - - concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2*hidden_size]) # dirty trick + concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2 * hidden_size]) # dirty trick reduce_mem = tf.layers.dense(concat_mem, hidden_size) projected_key = tf.layers.dense(key, hidden_size) - t_key = tf.reshape(projected_key,[-1, hidden_size, 1]) + t_key = tf.reshape(projected_key, [-1, hidden_size, 1]) score = tf.reshape(tf.matmul(reduce_mem, t_key), [-1, num_tokens]) inv_cum_att = tf.reshape(tf.ones_like(cum_att) - cum_att, [-1, num_tokens]) att = csoftmax(score, inv_cum_att) - t_reduce_mem = tf.transpose(reduce_mem, [0,2,1]) - t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0,2,1]) + t_reduce_mem = tf.transpose(reduce_mem, [0, 2, 1]) + t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0, 2, 1]) r_att = tf.reshape(att, [-1, num_tokens, 1]) - next_sketch = tf.squeeze(tf.matmul(t_reduce_mem,r_att),-1) - aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment,r_att),-1) + next_sketch = tf.squeeze(tf.matmul(t_reduce_mem, r_att), -1) + aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment, r_att), -1) return next_sketch, att, aligned_hidden_sketch @@ -165,11 +164,13 @@ def attention_gen_block(hidden_for_sketch, hidden_for_attn_alignment, key, atten aligned_hiddens = [] cum_att = tf.zeros(shape=[batch_size, num_tokens]) # cumulative attention for i in range(attention_depth): - sketch, cum_att_, aligned_hidden = attention_gen_step(hidden_for_sketch, hidden_for_attn_alignment, sketches[-1], key, cum_att) - sketches.append(sketch) #sketch - aligned_hiddens.append(aligned_hidden) #sketch + sketch, cum_att_, aligned_hidden = attention_gen_step(hidden_for_sketch, hidden_for_attn_alignment, + sketches[-1], key, cum_att) + sketches.append(sketch) # sketch + aligned_hiddens.append(aligned_hidden) # sketch cum_att += cum_att_ - final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]),[1, attention_depth, attn_alignment_hidden_size]) + final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]), + [1, attention_depth, attn_alignment_hidden_size]) return final_aligned_hiddens @@ -197,25 +198,24 @@ def attention_bah_step(hidden_for_sketch, hidden_for_attn_alignment, sketch, cum attn_alignment_dims = hidden_for_attn_alignment.get_shape().as_list() attn_alignment_hidden_size = attn_alignment_dims[2] - repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1,num_tokens, 1)) - concat_mem = tf.concat([hidden_for_sketch, repeated_sketch],-1) - + repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1, num_tokens, 1)) + concat_mem = tf.concat([hidden_for_sketch, repeated_sketch], -1) - concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2*hidden_size]) # dirty trick + concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2 * hidden_size]) # dirty trick reduce_mem = tf.layers.dense(concat_mem, hidden_size) - score = tf.squeeze(tf.layers.dense(reduce_mem, units = 1, - use_bias=False),-1) + score = tf.squeeze(tf.layers.dense(reduce_mem, units=1, + use_bias=False), -1) inv_cum_att = tf.reshape(tf.ones_like(cum_att) - cum_att, [-1, num_tokens]) att = csoftmax(score, inv_cum_att) - t_reduce_mem = tf.transpose(reduce_mem, [0,2,1]) - t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0,2,1]) + t_reduce_mem = tf.transpose(reduce_mem, [0, 2, 1]) + t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0, 2, 1]) r_att = tf.reshape(att, [-1, num_tokens, 1]) - next_sketch = tf.squeeze(tf.matmul(t_reduce_mem,r_att),-1) - aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment,r_att),-1) + next_sketch = tf.squeeze(tf.matmul(t_reduce_mem, r_att), -1) + aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment, r_att), -1) return next_sketch, att, aligned_hidden_sketch @@ -245,9 +245,11 @@ def attention_bah_block(hidden_for_sketch, hidden_for_attn_alignment, attention_ aligned_hiddens = [] cum_att = tf.zeros(shape=[batch_size, num_tokens]) # cumulative attention for i in range(attention_depth): - sketch, cum_att_, aligned_hidden = attention_bah_step(hidden_for_sketch, hidden_for_attn_alignment, sketches[-1], cum_att) - sketches.append(sketch) #sketch - aligned_hiddens.append(aligned_hidden) #sketch + sketch, cum_att_, aligned_hidden = attention_bah_step(hidden_for_sketch, hidden_for_attn_alignment, + sketches[-1], cum_att) + sketches.append(sketch) # sketch + aligned_hiddens.append(aligned_hidden) # sketch cum_att += cum_att_ - final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]),[1, attention_depth, attn_alignment_hidden_size]) + final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]), + [1, attention_depth, attn_alignment_hidden_size]) return final_aligned_hiddens diff --git a/deeppavlov/core/layers/tf_layers.py b/deeppavlov/core/layers/tf_layers.py index 7cb8298fb8..ee41a50725 100644 --- a/deeppavlov/core/layers/tf_layers.py +++ b/deeppavlov/core/layers/tf_layers.py @@ -22,8 +22,9 @@ log = getLogger(__name__) - INITIALIZER = tf.orthogonal_initializer + + # INITIALIZER = xavier_initializer @@ -537,13 +538,13 @@ def cudnn_gru(units, n_hidden, n_layers=1, trainable_initial_states=False, initial_h = input_initial_h or init_h - h, h_last = gru(tf.transpose(units, (1, 0, 2)), (initial_h, )) + h, h_last = gru(tf.transpose(units, (1, 0, 2)), (initial_h,)) h = tf.transpose(h, (1, 0, 2)) h_last = tf.squeeze(h_last, axis=0)[-1] # extract last layer state # Extract last states if they are provided if seq_lengths is not None: - indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths-1], axis=1) + indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths - 1], axis=1) h_last = tf.gather_nd(h, indices) return h, h_last @@ -586,6 +587,7 @@ def cudnn_compatible_gru(units, n_hidden, n_layers=1, trainable_initial_states=F with tf.variable_scope('cudnn_gru', reuse=reuse): def single_cell(): return tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(n_hidden) + cell = tf.nn.rnn_cell.MultiRNNCell([single_cell() for _ in range(n_layers)]) units = tf.transpose(units, (1, 0, 2)) @@ -598,7 +600,7 @@ def single_cell(): return tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(n_hidden) # Extract last states if they are provided if seq_lengths is not None: - indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths-1], axis=1) + indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths - 1], axis=1) h_last = tf.gather_nd(h, indices) return h, h_last @@ -606,7 +608,6 @@ def single_cell(): return tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(n_hidden) def cudnn_gru_wrapper(units, n_hidden, n_layers=1, trainable_initial_states=False, seq_lengths=None, input_initial_h=None, name='cudnn_gru', reuse=False): - if check_gpu_existence(): return cudnn_gru(units, n_hidden, n_layers, trainable_initial_states, seq_lengths, input_initial_h, name, reuse) @@ -672,7 +673,7 @@ def cudnn_lstm(units, n_hidden, n_layers=1, trainable_initial_states=None, seq_l # Extract last states if they are provided if seq_lengths is not None: - indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths-1], axis=1) + indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths - 1], axis=1) h_last = tf.gather_nd(h, indices) return h, (h_last, c_last) @@ -740,7 +741,7 @@ def single_cell(): return tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(n_hidden) # Extract last states if they are provided if seq_lengths is not None: - indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths-1], axis=1) + indices = tf.stack([tf.range(tf.shape(h)[0]), seq_lengths - 1], axis=1) h_last = tf.gather_nd(h, indices) return h, (h_last, c_last) @@ -748,7 +749,6 @@ def single_cell(): return tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(n_hidden) def cudnn_lstm_wrapper(units, n_hidden, n_layers=1, trainable_initial_states=None, seq_lengths=None, initial_h=None, initial_c=None, name='cudnn_lstm', reuse=False): - if check_gpu_existence(): return cudnn_lstm(units, n_hidden, n_layers, trainable_initial_states, seq_lengths, initial_h, initial_c, name, reuse) @@ -945,4 +945,4 @@ def variational_dropout(units, keep_prob, fixed_mask_dims=(1,)): noise_shape = [units_shape[n] for n in range(len(units.shape))] for dim in fixed_mask_dims: noise_shape[dim] = 1 - return tf.nn.dropout(units, rate=1-keep_prob, noise_shape=noise_shape) + return tf.nn.dropout(units, rate=1 - keep_prob, noise_shape=noise_shape) diff --git a/deeppavlov/core/models/component.py b/deeppavlov/core/models/component.py index 8e6f599293..c8b8f1886f 100644 --- a/deeppavlov/core/models/component.py +++ b/deeppavlov/core/models/component.py @@ -16,12 +16,12 @@ from logging import getLogger - log = getLogger(__name__) class Component(metaclass=ABCMeta): """Abstract class for all callables that could be used in Chainer's pipe.""" + @abstractmethod def __call__(self, *args, **kwargs): pass diff --git a/deeppavlov/core/models/estimator.py b/deeppavlov/core/models/estimator.py index 9cccd305d5..ddefc63abb 100644 --- a/deeppavlov/core/models/estimator.py +++ b/deeppavlov/core/models/estimator.py @@ -20,6 +20,7 @@ class Estimator(Component, Serializable): """Abstract class for components that could be fitted on the data as a whole.""" + @abstractmethod def fit(self, *args, **kwargs): pass diff --git a/deeppavlov/core/models/keras_model.py b/deeppavlov/core/models/keras_model.py index 5cf941159d..7c4bedf276 100644 --- a/deeppavlov/core/models/keras_model.py +++ b/deeppavlov/core/models/keras_model.py @@ -12,21 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import inspect from abc import abstractmethod -from copy import deepcopy from logging import getLogger -from typing import Optional, List, Union -import numpy as np import tensorflow as tf from keras import backend as K from overrides import overrides +from deeppavlov.core.models.lr_scheduled_model import LRScheduledModel from deeppavlov.core.models.nn_model import NNModel from deeppavlov.core.models.tf_backend import TfModelMeta -from deeppavlov.core.models.lr_scheduled_model import LRScheduledModel - log = getLogger(__name__) @@ -101,6 +96,7 @@ class LRScheduledKerasModel(LRScheduledModel, KerasModel): KerasModel enhanced with optimizer, learning rate and momentum management and search. """ + def __init__(self, **kwargs): """ Initialize model with given parameters diff --git a/deeppavlov/core/models/lr_scheduled_model.py b/deeppavlov/core/models/lr_scheduled_model.py index e3c68c6a40..6d2e5a9637 100644 --- a/deeppavlov/core/models/lr_scheduled_model.py +++ b/deeppavlov/core/models/lr_scheduled_model.py @@ -12,17 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Union, Tuple, List, Optional -from logging import getLogger -from abc import abstractmethod import math +from abc import abstractmethod from enum import IntEnum +from logging import getLogger +from typing import Union, Tuple, List, Optional import numpy as np from deeppavlov.core.common.errors import ConfigError - log = getLogger(__name__) @@ -80,8 +79,8 @@ def __init__(self, dec_type: Union[str, DecayType], start_val: float, self.div = 1.0 if not self.start_val else self.end_val / self.start_val def __str__(self): - return f"DecayScheduler(start_val={self.start_val}, end_val={self.end_val}"\ - f", dec_type={self.dec_type.name}, num_it={self.nb}, extra={self.extra})" + return f"DecayScheduler(start_val={self.start_val}, end_val={self.end_val}" \ + f", dec_type={self.dec_type.name}, num_it={self.nb}, extra={self.extra})" def next_val(self) -> float: self.iters = min(self.iters + 1, self.nb) @@ -334,8 +333,8 @@ def fit(self, *args): if not isinstance(report, dict): report = {'loss': report} # Calculating smoothed loss - avg_loss = self._fit_beta*avg_loss + (1 - self._fit_beta)*report['loss'] - smoothed_loss = avg_loss / (1 - self._fit_beta**(i + 1)) + avg_loss = self._fit_beta * avg_loss + (1 - self._fit_beta) * report['loss'] + smoothed_loss = avg_loss / (1 - self._fit_beta ** (i + 1)) lrs.append(self._lr) losses.append(smoothed_loss) log.info(f"Batch {i}/{num_batches}: smooth_loss = {smoothed_loss}" @@ -392,7 +391,7 @@ def _get_best(values: List[float], losses: List[float], assert len(values) == len(losses), "lengths of values and losses should be equal" min_ind = np.argmin(losses) for i in range(min_ind - 1, 0, -1): - if (losses[i] * max_loss_div > losses[min_ind]) or\ + if (losses[i] * max_loss_div > losses[min_ind]) or \ (values[i] * min_val_div < values[min_ind]): return values[i + 1] return values[min_ind] / min_val_div @@ -417,7 +416,7 @@ def process_event(self, event_name: str, data: dict) -> None: self._learning_rate_last_impatience = data['impatience'] - if (self._learning_rate_drop_patience is not None) and\ + if (self._learning_rate_drop_patience is not None) and \ (self._learning_rate_cur_impatience >= self._learning_rate_drop_patience): self._learning_rate_cur_impatience = 0 @@ -447,4 +446,3 @@ def process_event(self, event_name: str, data: dict) -> None: data['learning_rate'] = self._lr if (self._mom is not None) and ('momentum' not in data): data['momentum'] = self._mom - diff --git a/deeppavlov/core/models/nn_model.py b/deeppavlov/core/models/nn_model.py index 94bd4cf4ee..2a756b5c32 100644 --- a/deeppavlov/core/models/nn_model.py +++ b/deeppavlov/core/models/nn_model.py @@ -20,6 +20,7 @@ class NNModel(Component, Serializable): """Abstract class for deep learning components.""" + @abstractmethod def train_on_batch(self, x: list, y: list): pass diff --git a/deeppavlov/core/models/serializable.py b/deeppavlov/core/models/serializable.py index bb05ff5c9d..6d097476c5 100644 --- a/deeppavlov/core/models/serializable.py +++ b/deeppavlov/core/models/serializable.py @@ -27,7 +27,9 @@ class Serializable(metaclass=ABCMeta): :class:`deeppavlov.models.model.serializable.Serializable` is an abstract base class that expresses the interface for all models that can serialize data to a path. """ - def __init__(self, save_path: Optional[Union[str, Path]], load_path: Optional[Union[str, Path]] = None, mode: str = 'infer', + + def __init__(self, save_path: Optional[Union[str, Path]], load_path: Optional[Union[str, Path]] = None, + mode: str = 'infer', *args, **kwargs) -> None: if save_path: diff --git a/deeppavlov/core/models/tf_backend.py b/deeppavlov/core/models/tf_backend.py index 3568c7f4ce..e52f59b68d 100644 --- a/deeppavlov/core/models/tf_backend.py +++ b/deeppavlov/core/models/tf_backend.py @@ -21,10 +21,12 @@ def _graph_wrap(func, graph): """Constructs function encapsulated in the graph.""" + @wraps(func) def _wrapped(*args, **kwargs): with graph.as_default(): return func(*args, **kwargs) + return _wrapped @@ -37,11 +39,13 @@ def _wrapped(*args, **kwargs): with graph.as_default(): K.set_session(session) return func(*args, **kwargs) + return _wrapped class TfModelMeta(with_metaclass(type, ABCMeta)): """Metaclass that helps all child classes to have their own graph and session.""" + def __call__(cls, *args, **kwargs): obj = cls.__new__(cls) from .keras_model import KerasModel diff --git a/deeppavlov/core/models/tf_model.py b/deeppavlov/core/models/tf_model.py index 4987821cd7..39d867165b 100644 --- a/deeppavlov/core/models/tf_model.py +++ b/deeppavlov/core/models/tf_model.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Union, Tuple, Optional from collections import defaultdict from logging import getLogger from pathlib import Path +from typing import Iterable, Union, Tuple, Optional import numpy as np import tensorflow as tf @@ -24,10 +24,9 @@ from deeppavlov.core.common.errors import ConfigError from deeppavlov.core.common.registry import cls_from_str +from deeppavlov.core.models.lr_scheduled_model import LRScheduledModel from deeppavlov.core.models.nn_model import NNModel from deeppavlov.core.models.tf_backend import TfModelMeta -from deeppavlov.core.models.lr_scheduled_model import LRScheduledModel - log = getLogger(__name__) @@ -244,7 +243,7 @@ def get_train_op(self, def get_optimizer(self): return self._optimizer - def load(self, + def load(self, exclude_scopes: Optional[Iterable] = ('Optimizer', 'learning_rate', 'momentum'), @@ -253,4 +252,3 @@ def load(self, def process_event(self, *args, **kwargs): LRScheduledModel.process_event(self, *args, **kwargs) - diff --git a/deeppavlov/core/trainers/fit_trainer.py b/deeppavlov/core/trainers/fit_trainer.py index 4cdf156541..7a7820f962 100644 --- a/deeppavlov/core/trainers/fit_trainer.py +++ b/deeppavlov/core/trainers/fit_trainer.py @@ -56,6 +56,7 @@ class FitTrainer: (default is ``-1``) **kwargs: additional parameters whose names will be logged but otherwise ignored """ + def __init__(self, chainer_config: dict, *, batch_size: int = -1, metrics: Iterable[Union[str, dict]] = ('accuracy',), evaluation_targets: Iterable[str] = ('valid', 'test'), diff --git a/deeppavlov/core/trainers/nn_trainer.py b/deeppavlov/core/trainers/nn_trainer.py index f2f51d8d19..f70509598c 100644 --- a/deeppavlov/core/trainers/nn_trainer.py +++ b/deeppavlov/core/trainers/nn_trainer.py @@ -87,6 +87,7 @@ class NNTrainer(FitTrainer): * Save the model if it happened before 1st validation (to capture early training results), don't save otherwise. """ + def __init__(self, chainer_config: dict, *, batch_size: int = 1, epochs: int = -1, start_epoch_num: int = 0, @@ -115,7 +116,7 @@ def __init__(self, chainer_config: dict, *, batch_size: int = 1, def _improved(op): return lambda score, baseline: False if baseline is None or score is None \ - else op(score,baseline) + else op(score, baseline) if metric_optimization == 'maximize': self.improved = _improved(lambda a, b: a > b) @@ -156,7 +157,7 @@ def save(self) -> None: def _is_initial_validation(self): return self.validation_number == 0 - + def _is_first_validation(self): return self.validation_number == 1 @@ -240,7 +241,7 @@ def _log(self, iterator: DataLearningIterator, report.update(self.last_result) if self.losses: - report['loss'] = sum(self.losses)/len(self.losses) + report['loss'] = sum(self.losses) / len(self.losses) self.losses.clear() metrics.append(('loss', report['loss'])) @@ -342,5 +343,3 @@ def train(self, iterator: DataLearningIterator) -> None: if self.validation_number < 1: log.info('Save model to capture early training results') self.save() - - diff --git a/deeppavlov/dataset_iterators/basic_classification_iterator.py b/deeppavlov/dataset_iterators/basic_classification_iterator.py index 1168142c4d..390a6ba442 100644 --- a/deeppavlov/dataset_iterators/basic_classification_iterator.py +++ b/deeppavlov/dataset_iterators/basic_classification_iterator.py @@ -46,6 +46,7 @@ class BasicClassificationDatasetIterator(DataLearningIterator): Attributes: data: dictionary of data with fields "train", "valid" and "test" (or some of them) """ + def __init__(self, data: dict, fields_to_merge: List[str] = None, merged_field: str = None, field_to_split: str = None, split_fields: List[str] = None, split_proportions: List[float] = None, @@ -81,7 +82,7 @@ def __init__(self, data: dict, raise IOError("Given field to split BUT not given names of split fields") def _split_data(self, field_to_split: str = None, split_fields: List[str] = None, - split_proportions: List[float] = None, split_seed: int=None, stratify: bool = None) -> bool: + split_proportions: List[float] = None, split_seed: int = None, stratify: bool = None) -> bool: """ Split given field of dataset to the given list of fields with corresponding proportions diff --git a/deeppavlov/dataset_iterators/dialog_iterator.py b/deeppavlov/dataset_iterators/dialog_iterator.py index 447c47c578..59d56cb2af 100644 --- a/deeppavlov/dataset_iterators/dialog_iterator.py +++ b/deeppavlov/dataset_iterators/dialog_iterator.py @@ -63,6 +63,7 @@ class DialogDBResultDatasetIterator(DataLearningIterator): valid: list of tuples ``(db_result dictionary, '')`` from "valid" data test: list of tuples ``(db_result dictionary, '')`` from "test" data """ + @staticmethod def _db_result(data): x, y = data diff --git a/deeppavlov/dataset_iterators/document_bert_ner_iterator.py b/deeppavlov/dataset_iterators/document_bert_ner_iterator.py deleted file mode 100644 index 58fb0cacb3..0000000000 --- a/deeppavlov/dataset_iterators/document_bert_ner_iterator.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2017 Neural Networks and Deep Learning lab, MIPT -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from copy import copy -from random import Random -from typing import List, Dict, Tuple, Any, Iterator, Optional -import itertools -from logging import getLogger - -import numpy as np -from bert_dp.preprocessing import convert_examples_to_features, InputExample, InputFeatures -from bert_dp.tokenization import FullTokenizer - -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.data_learning_iterator import DataLearningIterator -from deeppavlov.core.commands.utils import expand_path -from deeppavlov.core.common.registry import register -from deeppavlov.core.data.utils import zero_pad -from deeppavlov.core.models.component import Component - -logger = getLogger(__name__) - - -@register('document_bert_ner_iterator') -class DocumentBertNerIterator(DataLearningIterator): - """Dataset iterator for learning models, e. g. neural networks. - - Args: - data: list of (x, y) pairs for every data type in ``'train'``, ``'valid'`` and ``'test'`` - seed: random seed for data shuffling - shuffle: whether to shuffle data during batching - - Attributes: - shuffle: whether to shuffle data during batching - random: instance of ``Random`` initialized with a seed - """ - - def __init__(self, - data: Dict[str, List[Tuple[Any, Any]]], - bert_tokenizer_vocab_file: str, - do_lower_case: bool = False, - left_context_rate: float = 0.5, - max_seq_length: int = None, - one_sample_per_doc: bool = False, - seed: int = None, - shuffle: bool = True, - *args, **kwargs) -> None: - self.max_seq_length = max_seq_length or float('inf') - self.one_sample_per_doc = one_sample_per_doc - self.left_context_rate = left_context_rate - vocab_file = str(expand_path(bert_tokenizer_vocab_file)) - self.tokenizer = FullTokenizer(vocab_file=vocab_file, - do_lower_case=do_lower_case) - super().__init__(data, seed, shuffle, *args, **kwargs) - - def gen_batches(self, batch_size: int, data_type: str = 'train', - shuffle: bool = None) -> Iterator[Tuple[tuple, tuple]]: - """Generate batches of inputs and expected output to train neural networks - - Args: - batch_size: number of samples in batch - data_type: can be either 'train', 'test', or 'valid' - shuffle: whether to shuffle dataset before batching - - Yields: - a tuple of a batch of inputs and a batch of expected outputs - """ - if shuffle is None: - shuffle = self.shuffle - - data = self.data[data_type] - # doc_data: list of tuples (doc_id, list of doc samples) - doc_data = [(doc_id, [self.rm_doc_id(s) for s in doc]) - for doc_id, doc in itertools.groupby(data, key=self.get_doc_id)] - num_docs = len(doc_data) - - if num_docs == 0: - return - - # get all sentences from document - doc_chunks = [self.chunks_from_doc(doc) for doc_id, doc in doc_data] - if self.one_sample_per_doc: - samples = [next(chunk) for chunk in doc_chunks] - else: - samples = [s for chunk in doc_chunks for s in chunk] - num_samples = len(samples) - - order = list(range(num_samples)) - - if shuffle: - self.random.shuffle(order) - - if batch_size < 0: - batch_size = num_samples - - for i in range((num_samples - 1) // batch_size + 1): - yield tuple(zip(*[samples[o] - for o in order[i * batch_size: (i + 1) * batch_size]])) - - def get_instances(self, data_type: str = 'train') -> Tuple[tuple, tuple]: - data = self.data[data_type] - data_wo_doc_ids = (self.rm_doc_id(s) for s in data) - return tuple(zip(*data_wo_doc_ids)) - - @staticmethod - def get_doc_id(sample: Tuple[Any, Any]) -> int: - return sample[0][-1] - - @staticmethod - def rm_doc_id(sample: Tuple[Any, Any]) -> Tuple[Any, Any]: - x, y = sample - if len(x) > 2: - return (x[:-1], y) - return (x[0], y) - - @staticmethod - def get_text(sample: Tuple[Any, Any]) -> List[str]: - x, y = sample - if not isinstance(x[0], str): - return x[0] - return x - - @staticmethod - def merge_samples(samples: List[Tuple[Any, Any]]) -> Tuple[Any, Any]: - out_x, out_y = [], [] - for x, y in samples: - if not isinstance(x[0], str): - if not out_x: - out_x = [[]] * len(x) - out_x = tuple(out_x_i + x_i for out_x_i, x_i in zip(out_x, x)) - else: - out_x.extend(x) - out_y.extend(y) - return (out_x, out_y) - - def sample_from_doc(self, doc: List[Tuple[Any, Any]]) -> Tuple[Any, Any]: - sample_id = self.random.randint(0, len(doc) - 1) - doc_texts = [self.get_text(s) for s in doc] - rich_sample_ids = self.get_context_indices(doc_texts, - sample_id=sample_id, - subtokenizer=self.tokenizer, - max_subtokens_length=self.max_seq_length, - left_context_rate=self.left_context_rate, - random=self.random) - return self.merge_samples((doc[i] for i in rich_sample_ids)) - - def chunks_from_doc(self, doc: List[Tuple[Any, Any]]) -> List[Tuple[Any, Any]]: - pull_of_samples = copy(doc) - pull_of_texts = [self.get_text(s) for s in doc] - while pull_of_samples: - rich_sample_ids = self.get_context_indices(pull_of_texts, - sample_id=0, - subtokenizer=self.tokenizer, - max_subtokens_length=self.max_seq_length, - left_context_rate=0.0, - random=self.random) - # TODO: split differently & replace tags with 'X's for contexts - yield self.merge_samples((pull_of_samples[i] for i in rich_sample_ids)) - pull_of_samples = pull_of_samples[len(rich_sample_ids):] - pull_of_texts = pull_of_texts[len(rich_sample_ids):] - if len(rich_sample_ids) != max(rich_sample_ids) + 1: - raise RuntimeError("can't split doc {doc} into chunks") - - @staticmethod - def get_context_indices(samples: List[List[str]], - sample_id: int, - subtokenizer: FullTokenizer, - max_subtokens_length: int, - left_context_rate: float = 0.5, - random: Random = Random(31)) -> List[int]: - rich_sample_indices = [sample_id] - - toks = samples[sample_id] - l_ctx = samples[:sample_id] - r_ctx = samples[sample_id + 1:] - - subtoks_len = len([st for t in toks - for st in subtokenizer.tokenize(t)]) - l_i, r_i = 0, 0 - while (l_i < len(l_ctx)) or (r_i < len(r_ctx)): - l_rate = left_context_rate if r_i < len(r_ctx) else 1.0 - if (l_i < len(l_ctx)) and (random.random() < l_rate): - # add one sentence from left_context - subtoks = [st for t in l_ctx[-l_i-1] - for st in subtokenizer.tokenize(t)] - if subtoks_len + len(subtoks) > max_subtokens_length: - break - subtoks_len += len(subtoks) - rich_sample_indices = [sample_id - l_i - 1] + rich_sample_indices - l_i += 1 - else: - # add one sentence from right_context - subtoks = [st for t in r_ctx[r_i] for st in subtokenizer.tokenize(t)] - if subtoks_len + len(subtoks) > max_subtokens_length: - break - subtoks_len += len(subtoks) - rich_sample_indices.append(sample_id + r_i + 1) - r_i += 1 - return rich_sample_indices - diff --git a/deeppavlov/dataset_iterators/dstc2_intents_iterator.py b/deeppavlov/dataset_iterators/dstc2_intents_iterator.py index b893b5710d..3ad34bee4c 100644 --- a/deeppavlov/dataset_iterators/dstc2_intents_iterator.py +++ b/deeppavlov/dataset_iterators/dstc2_intents_iterator.py @@ -43,6 +43,7 @@ class Dstc2IntentsDatasetIterator(BasicClassificationDatasetIterator): Attributes: data: dictionary of data with fields "train", "valid" and "test" (or some of them) """ + def __init__(self, data: dict, fields_to_merge: List[str] = None, merged_field: str = None, field_to_split: str = None, split_fields: List[str] = None, split_proportions: List[float] = None, diff --git a/deeppavlov/dataset_iterators/dstc2_ner_iterator.py b/deeppavlov/dataset_iterators/dstc2_ner_iterator.py index 420e3f3a95..7b12721497 100644 --- a/deeppavlov/dataset_iterators/dstc2_ner_iterator.py +++ b/deeppavlov/dataset_iterators/dstc2_ner_iterator.py @@ -14,7 +14,6 @@ import json import logging -from overrides import overrides from typing import List, Tuple, Dict, Any from deeppavlov.core.commands.utils import expand_path @@ -36,6 +35,7 @@ class Dstc2NerDatasetIterator(DataLearningIterator): seed: value for random seed shuffle: whether to shuffle the data """ + def __init__(self, data: Dict[str, List[Tuple]], slot_values_path: str, @@ -88,8 +88,8 @@ def _add_bio_markup(self, slot_tokens = entity.split() slot_len = len(slot_tokens) if n + slot_len <= n_toks and \ - self._is_equal_sequences(tokens[n: n + slot_len], - slot_tokens): + self._is_equal_sequences(tokens[n: n + slot_len], + slot_tokens): tags[n] = 'B-' + slot_type for k in range(1, slot_len): tags[n + k] = 'I-' + slot_type diff --git a/deeppavlov/dataset_iterators/elmo_file_paths_iterator.py b/deeppavlov/dataset_iterators/elmo_file_paths_iterator.py index 84349b574c..a887fe8b4c 100644 --- a/deeppavlov/dataset_iterators/elmo_file_paths_iterator.py +++ b/deeppavlov/dataset_iterators/elmo_file_paths_iterator.py @@ -91,15 +91,15 @@ def _line2ids(self, line): reversed_token_ids = list(reversed(token_ids)) token_ids = token_ids[1:] reversed_token_ids = reversed_token_ids[1:] - + return char_ids, reversed_char_ids, token_ids, reversed_token_ids - + def _line_generator(self, shard_generator): for shard in shard_generator: line_generator = chunk_generator(shard, 1) for line in line_generator: line = line[0] - char_ids, reversed_char_ids, token_ids, reversed_token_ids =\ + char_ids, reversed_char_ids, token_ids, reversed_token_ids = \ self._line2ids(line) yield char_ids, reversed_char_ids, token_ids, reversed_token_ids @@ -124,13 +124,13 @@ def _batch_generator(line_generator, batch_size, unroll_steps): sti.clear() sti.extend(_s) - char_ids, reversed_char_ids, token_ids, reversed_token_ids =\ + char_ids, reversed_char_ids, token_ids, reversed_token_ids = \ zip(*batch) yield char_ids, reversed_char_ids, token_ids, reversed_token_ids except StopIteration: pass - def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: Optional[bool] = None)\ + def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: Optional[bool] = None) \ -> Iterator[Tuple[str, str]]: if shuffle is None: shuffle = self.shuffle diff --git a/deeppavlov/dataset_iterators/file_paths_iterator.py b/deeppavlov/dataset_iterators/file_paths_iterator.py index 1230d115ba..9d8769f8b2 100644 --- a/deeppavlov/dataset_iterators/file_paths_iterator.py +++ b/deeppavlov/dataset_iterators/file_paths_iterator.py @@ -39,7 +39,7 @@ class FilePathsIterator(DataLearningIterator): def __init__(self, data: Dict[str, List[Union[str, Path]]], - seed: Optional[int] = None, + seed: Optional[int] = None, shuffle: bool = True, *args, **kwargs) -> None: self.seed = seed @@ -58,7 +58,7 @@ def _shard_generator(self, shards: List[Union[str, Path]], shuffle: bool = False self.np_random.shuffle(lines) yield lines - def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: Optional[bool] = None)\ + def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: Optional[bool] = None) \ -> Iterator[Tuple[str, str]]: if shuffle is None: shuffle = self.shuffle diff --git a/deeppavlov/dataset_iterators/kvret_dialog_iterator.py b/deeppavlov/dataset_iterators/kvret_dialog_iterator.py index dcc3f8e772..c2147c63a2 100644 --- a/deeppavlov/dataset_iterators/kvret_dialog_iterator.py +++ b/deeppavlov/dataset_iterators/kvret_dialog_iterator.py @@ -30,7 +30,8 @@ class KvretDialogDatasetIterator(DataLearningIterator): valid: list of "valid" ``(context, response)`` tuples test: list of "test" ``(context, response)`` tuples """ -# TODO: write custom batch_generator: order of utterances from one dialogue is presumed + + # TODO: write custom batch_generator: order of utterances from one dialogue is presumed @staticmethod def _dialogs(data): dialogs = [] @@ -38,14 +39,14 @@ def _dialogs(data): task = None for x, y in data: if x.get('episode_done'): - #history = [] + # history = [] history = "" dialogs.append((([], [], [], [], []), ([], []))) task = y['task'] - #history.append((x, y)) + # history.append((x, y)) history = history + ' ' + x['text'] + ' ' + y['text'] - #x['history'] = history[:-1] - x['history'] = history[:-len(x['text'])-len(y['text'])-2] + # x['history'] = history[:-1] + x['history'] = history[:-len(x['text']) - len(y['text']) - 2] dialogs[-1][0][0].append(x['text']) dialogs[-1][0][1].append(x['dialog_id']) dialogs[-1][0][2].append(x['history']) @@ -68,7 +69,7 @@ def preprocess(self, data, *args, **kwargs): history = history + ' ' + x['text'] + ' ' + y['text'] # x['x_hist'] = x_hist[:-1] # x['y_hist'] = y_hist[:-1] - x['history'] = history[:-len(x['text'])-len(y['text'])-2] + x['history'] = history[:-len(x['text']) - len(y['text']) - 2] x_tuple = (x['text'], x['dialog_id'], x['history'], x['kb_columns'], x['kb_items']) y_tuple = (y['text'], y['task']['intent']) diff --git a/deeppavlov/dataset_iterators/morphotagger_iterator.py b/deeppavlov/dataset_iterators/morphotagger_iterator.py index dca0daa990..b94d5b0285 100644 --- a/deeppavlov/dataset_iterators/morphotagger_iterator.py +++ b/deeppavlov/dataset_iterators/morphotagger_iterator.py @@ -60,8 +60,9 @@ class MorphoTaggerDatasetIterator(DataLearningIterator): For fair comparison with UD Pipe it is set to 0.9 for UD experiments. It is actually used only for Turkish data. """ + def __init__(self, data: Dict[str, List[Tuple[Any, Any]]], seed: int = None, - shuffle: bool = True, min_train_fraction: float = 0.0, + shuffle: bool = True, min_train_fraction: float = 0.0, validation_split: float = 0.2) -> None: self.validation_split = validation_split self.min_train_fraction = min_train_fraction diff --git a/deeppavlov/dataset_iterators/ner_few_shot_iterator.py b/deeppavlov/dataset_iterators/ner_few_shot_iterator.py index a635ec874e..52e1fa38c1 100644 --- a/deeppavlov/dataset_iterators/ner_few_shot_iterator.py +++ b/deeppavlov/dataset_iterators/ner_few_shot_iterator.py @@ -35,6 +35,7 @@ class NERFewShotIterator(DataLearningIterator): the same remove_not_targets: whether to replace all non target tags with `O` tag or not. """ + def __init__(self, data: Dict[str, List[Tuple[Any, Any]]], seed: int = None, diff --git a/deeppavlov/dataset_iterators/siamese_iterator.py b/deeppavlov/dataset_iterators/siamese_iterator.py index 222a09efed..dd418d6532 100644 --- a/deeppavlov/dataset_iterators/siamese_iterator.py +++ b/deeppavlov/dataset_iterators/siamese_iterator.py @@ -65,7 +65,7 @@ def split(self, *args, **kwargs) -> None: self.test = self.train[-self.len_test:] self.train = self.train[:-self.len_test] - def gen_batches(self, batch_size: int, data_type: str = "train", shuffle: bool = True)->\ + def gen_batches(self, batch_size: int, data_type: str = "train", shuffle: bool = True) -> \ Tuple[List[List[Tuple[int, int]]], List[int]]: """Generate batches of inputs and expected outputs to train neural networks. diff --git a/deeppavlov/dataset_iterators/snips_intents_iterator.py b/deeppavlov/dataset_iterators/snips_intents_iterator.py index 306329e762..2a881634ac 100644 --- a/deeppavlov/dataset_iterators/snips_intents_iterator.py +++ b/deeppavlov/dataset_iterators/snips_intents_iterator.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Any - from overrides import overrides from deeppavlov.core.common.registry import register diff --git a/deeppavlov/dataset_iterators/squad_iterator.py b/deeppavlov/dataset_iterators/squad_iterator.py index 518c6b6aa7..c7300799f8 100644 --- a/deeppavlov/dataset_iterators/squad_iterator.py +++ b/deeppavlov/dataset_iterators/squad_iterator.py @@ -95,7 +95,7 @@ def __init__(self, data, seed: Optional[int] = None, shuffle: bool = True, with_ self.np_random = np.random.RandomState(seed) super().__init__(data, seed, shuffle, *args, **kwargs) - def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: bool = None)\ + def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: bool = None) \ -> Generator[Tuple[Tuple[Tuple[str, str]], Tuple[List[str], List[int]]], None, None]: if shuffle is None: @@ -114,7 +114,7 @@ def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: bool = for i in range((data_len - 1) // batch_size + 1): batch = [] - for j in range(i * batch_size, min((i+1) * batch_size, data_len)): + for j in range(i * batch_size, min((i + 1) * batch_size, data_len)): q = data[j]['question'] contexts = data[j]['contexts'] ans_contexts = [c for c in contexts if len(c['answer']) > 0] @@ -131,7 +131,8 @@ def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: bool = context = noans_contexts[np.argmax(random.multinomial(1, noans_scores))] answer_text = [ans['text'] for ans in context['answer']] if len(context['answer']) > 0 else [''] - answer_start = [ans['answer_start'] for ans in context['answer']] if len(context['answer']) > 0 else [-1] + answer_start = [ans['answer_start'] + for ans in context['answer']] if len(context['answer']) > 0 else [-1] batch.append(((context['context'], q), (answer_text, answer_start))) yield tuple(zip(*batch)) @@ -194,7 +195,7 @@ def __init__(self, data, seed: Optional[int] = None, shuffle: bool = False, if self.shuffle: raise RuntimeError('MultiSquadIterator doesn\'t support shuffling.') - def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: bool = None)\ + def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: bool = None) \ -> Generator[Tuple[Tuple[Tuple[str, str]], Tuple[List[str], List[int]]], None, None]: if shuffle is None: @@ -266,4 +267,4 @@ def get_instances(self, data_type: str = 'train') -> Tuple[Tuple[Tuple[str, str] answer_text = [x['text'] for x in context['answer']] answer_start = [x['answer_start'] for x in context['answer']] data_examples.append(((context['context'], question), (answer_text, answer_start))) - return tuple(zip(*data_examples)) \ No newline at end of file + return tuple(zip(*data_examples)) diff --git a/deeppavlov/dataset_iterators/typos_iterator.py b/deeppavlov/dataset_iterators/typos_iterator.py index a5e7eb483a..72561ff542 100644 --- a/deeppavlov/dataset_iterators/typos_iterator.py +++ b/deeppavlov/dataset_iterators/typos_iterator.py @@ -22,7 +22,8 @@ class TyposDatasetIterator(DataLearningIterator): :class:`~deeppavlov.models.spelling_correction.brillmoore.ErrorModel` """ - def split(self, test_ratio: float=0., *args, **kwargs): + + def split(self, test_ratio: float = 0., *args, **kwargs): """Split all data into train and test Args: diff --git a/deeppavlov/dataset_readers/amazon_ecommerce_reader.py b/deeppavlov/dataset_readers/amazon_ecommerce_reader.py index 50ab12ae3e..b8a0dc0acc 100644 --- a/deeppavlov/dataset_readers/amazon_ecommerce_reader.py +++ b/deeppavlov/dataset_readers/amazon_ecommerce_reader.py @@ -21,6 +21,7 @@ logger = getLogger(__name__) + @register('amazon_ecommerce_reader') class AmazonEcommerceReader(DatasetReader): """Class to download and load ecommerce data catalog""" @@ -56,8 +57,8 @@ def read(self, data_path: str, catalog: list, **kwargs) -> Dict[str, List[Tuple[ dataset = { 'train': [((item['Title'], [], {}), item) for item in ec_data_global], 'valid': [], - 'test': [] - } + 'test': [] + } logger.info(f"In total {len(ec_data_global)} items are loaded") return dataset diff --git a/deeppavlov/dataset_readers/basic_classification_reader.py b/deeppavlov/dataset_readers/basic_classification_reader.py index 8b33963dd5..af2a73a9f3 100644 --- a/deeppavlov/dataset_readers/basic_classification_reader.py +++ b/deeppavlov/dataset_readers/basic_classification_reader.py @@ -63,7 +63,9 @@ def read(self, data_path: str, url: str = None, if not Path(data_path, train_file).exists(): if url is None: - raise Exception("data path {} does not exist or is empty, and download url parameter not specified!".format(data_path)) + raise Exception( + "data path {} does not exist or is empty, and download url parameter not specified!".format( + data_path)) log.info("Loading train data from {} to {}".format(url, data_path)) download(source_url=url, dest_file_path=Path(data_path, train_file)) diff --git a/deeppavlov/dataset_readers/conll2003_reader.py b/deeppavlov/dataset_readers/conll2003_reader.py index 19d2bc1d87..0e0958e34a 100644 --- a/deeppavlov/dataset_readers/conll2003_reader.py +++ b/deeppavlov/dataset_readers/conll2003_reader.py @@ -1,5 +1,5 @@ -from pathlib import Path from logging import getLogger +from pathlib import Path from deeppavlov.core.common.registry import register from deeppavlov.core.data.dataset_reader import DatasetReader @@ -7,6 +7,7 @@ log = getLogger(__name__) + @register('conll2003_reader') class Conll2003DatasetReader(DatasetReader): """Class to read training datasets in CoNLL-2003 format""" @@ -94,7 +95,7 @@ def parse_ner_file(self, file_name: Path): except: log.warning('Skip {}, splitted as {}'.format(repr(line), repr(line.split()))) continue - + tags.append(tag) tokens.append(token) @@ -106,7 +107,7 @@ def parse_ner_file(self, file_name: Path): x = x + (self.num_docs,) samples.append((x, tags)) self.num_docs += 1 - + if self.iob: return [(x, self._iob2_to_iob(tags)) for x, tags in samples] diff --git a/deeppavlov/dataset_readers/dstc2_reader.py b/deeppavlov/dataset_readers/dstc2_reader.py index 187d047e52..4d2aa7513d 100644 --- a/deeppavlov/dataset_readers/dstc2_reader.py +++ b/deeppavlov/dataset_readers/dstc2_reader.py @@ -111,7 +111,7 @@ def _read_from_file(cls, file_path, dialogs=False): """Returns data from single file""" log.info(f"[loading dialogs from {file_path}]") - utterances, responses, dialog_indices =\ + utterances, responses, dialog_indices = \ cls._get_turns(cls._iter_file(file_path), with_indices=True) data = list(map(cls._format_turn, zip(utterances, responses))) @@ -289,7 +289,7 @@ def _read_from_file(cls, file_path: str, dialogs: bool = False): """Returns data from single file""" log.info(f"[loading dialogs from {file_path}]") - utterances, responses, dialog_indices =\ + utterances, responses, dialog_indices = \ cls._get_turns(json.load(open(file_path, 'rt')), with_indices=True) data = list(map(cls._format_turn, zip(utterances, responses))) diff --git a/deeppavlov/dataset_readers/file_paths_reader.py b/deeppavlov/dataset_readers/file_paths_reader.py index cbe632f58b..adddc08470 100644 --- a/deeppavlov/dataset_readers/file_paths_reader.py +++ b/deeppavlov/dataset_readers/file_paths_reader.py @@ -57,7 +57,7 @@ def _get_files(self, data_path, tgt): paths = Path(data_path).resolve().glob(tgt) files = [file for file in paths if Path(file).is_file()] paths_info = Path(data_path, tgt).absolute().as_posix() - if not(files): + if not (files): raise Exception(f"Not find files. Data path '{paths_info}' does not exist or does not hold files!") else: log.info(f"Found {len(files)} files located '{paths_info}'.") diff --git a/deeppavlov/dataset_readers/kvret_reader.py b/deeppavlov/dataset_readers/kvret_reader.py index 996eb3f025..275a5f0f5d 100644 --- a/deeppavlov/dataset_readers/kvret_reader.py +++ b/deeppavlov/dataset_readers/kvret_reader.py @@ -78,8 +78,8 @@ def _read_from_file(cls, file_path, dialogs=False): """Returns data from single file""" log.info("[loading dialogs from {}]".format(file_path)) - utterances, responses, dialog_indices =\ - cls._get_turns(cls._iter_file(file_path), with_indices=True) + utterances, responses, dialog_indices = \ + cls._get_turns(cls._iter_file(file_path), with_indices=True) data = list(map(cls._format_turn, zip(utterances, responses))) @@ -105,22 +105,22 @@ def _format_turn(turn): @staticmethod def _check_dialog(dialog): - #TODO: manually fix bad dialogs + # TODO: manually fix bad dialogs driver = True for turn in dialog: if turn['turn'] not in ('driver', 'assistant'): raise RuntimeError("Dataset wrong format: `turn` key value is" " either `driver` or `assistant`.") if driver and turn['turn'] != 'driver': - log.debug("Turn is expected to by driver's, but it's {}'s"\ + log.debug("Turn is expected to by driver's, but it's {}'s" \ .format(turn['turn'])) return False if not driver and turn['turn'] != 'assistant': - log.debug("Turn is expected to be assistant's but it's {}'s"\ + log.debug("Turn is expected to be assistant's but it's {}'s" \ .format(turn['turn'])) return False driver = not driver - #if not driver: + # if not driver: # log.debug("Last turn is expected to be by assistant") # return False return True @@ -143,7 +143,7 @@ def _iter_file(cls, file_path): if cls._check_dialog(dialog): yield dialog, sample['scenario'] else: - log.warn("Skipping {}th dialogue with uuid={}: wrong format."\ + log.warn("Skipping {}th dialogue with uuid={}: wrong format." \ .format(i, sample['scenario']['uuid'])) @staticmethod @@ -172,7 +172,7 @@ def _get_turns(data, with_indices=False): if last_utter and not last_utter[-1].isspace(): last_utter += ' ' responses[-1]['utterance'] = last_utter + 'END_OF_DIALOGUE' - + dialog_indices.append({ 'start': len(utterances), 'end': len(utterances) + len(dialog), @@ -181,4 +181,3 @@ def _get_turns(data, with_indices=False): if with_indices: return utterances, responses, dialog_indices return utterances, responses - diff --git a/deeppavlov/dataset_readers/line_reader.py b/deeppavlov/dataset_readers/line_reader.py index 0699f9b141..651ff56041 100644 --- a/deeppavlov/dataset_readers/line_reader.py +++ b/deeppavlov/dataset_readers/line_reader.py @@ -36,7 +36,7 @@ def read(self, data_path: str = None, *args, **kwargs) -> Dict: content = f.readlines() dataset = dict() - dataset["train"] = [(line, ) for line in content] + dataset["train"] = [(line,) for line in content] dataset["valid"] = [] dataset["test"] = [] diff --git a/deeppavlov/dataset_readers/morphotagging_dataset_reader.py b/deeppavlov/dataset_readers/morphotagging_dataset_reader.py index c23363d170..af0af41196 100644 --- a/deeppavlov/dataset_readers/morphotagging_dataset_reader.py +++ b/deeppavlov/dataset_readers/morphotagging_dataset_reader.py @@ -23,14 +23,15 @@ WORD_COLUMN, POS_COLUMN, TAG_COLUMN = 1, 3, 5 - log = getLogger(__name__) + def get_language(filepath: str) -> str: """Extracts language from typical UD filename """ return filepath.split("-")[0] + def read_infile(infile: Union[Path, str], from_words=False, word_column: int = WORD_COLUMN, pos_column: int = POS_COLUMN, tag_column: int = TAG_COLUMN, max_sents: int = -1, @@ -163,7 +164,7 @@ def read(self, data_path: Union[List, str], for mode, filepath in zip(data_types, data_path): if mode == "dev": mode = "valid" -# if mode == "test": -# kwargs["read_only_words"] = True + # if mode == "test": + # kwargs["read_only_words"] = True data[mode] = read_infile(filepath, **kwargs) return data diff --git a/deeppavlov/dataset_readers/paraphraser_pretrain_reader.py b/deeppavlov/dataset_readers/paraphraser_pretrain_reader.py index 50f041fb89..1ad5d68adf 100644 --- a/deeppavlov/dataset_readers/paraphraser_pretrain_reader.py +++ b/deeppavlov/dataset_readers/paraphraser_pretrain_reader.py @@ -42,7 +42,6 @@ def read(self, dataset = {"train": train_data, "valid": test_data, "test": test_data} return dataset - def int_class(self, str_y): if str_y == '-1': return 0 @@ -53,5 +52,3 @@ def build_data(self, name): with open(name) as f: data = json.load(f) return [([doc['text_1'], doc['text_2']], self.int_class(doc['class'])) for doc in data] - - diff --git a/deeppavlov/dataset_readers/paraphraser_reader.py b/deeppavlov/dataset_readers/paraphraser_reader.py index 638d4e6ce3..e73e12985d 100644 --- a/deeppavlov/dataset_readers/paraphraser_reader.py +++ b/deeppavlov/dataset_readers/paraphraser_reader.py @@ -41,7 +41,7 @@ def read(self, data_path = expand_path(data_path) train_fname = data_path / 'paraphrases.xml' - test_fname = data_path / 'paraphrases_gold.xml' + test_fname = data_path / 'paraphrases_gold.xml' train_data = self.build_data(train_fname, do_lower_case) test_data = self.build_data(test_fname, do_lower_case) dataset = {"train": train_data, "valid": [], "test": test_data} diff --git a/deeppavlov/dataset_readers/snips_reader.py b/deeppavlov/dataset_readers/snips_reader.py index 28ec70b56c..7041df6aa7 100644 --- a/deeppavlov/dataset_readers/snips_reader.py +++ b/deeppavlov/dataset_readers/snips_reader.py @@ -14,8 +14,8 @@ import json from logging import getLogger -from typing import List, Dict, Any, Optional from pathlib import Path +from typing import List, Dict, Any, Optional from overrides import overrides diff --git a/deeppavlov/dataset_readers/sq_reader.py b/deeppavlov/dataset_readers/sq_reader.py index 4ee2503dd7..00949a6cb5 100644 --- a/deeppavlov/dataset_readers/sq_reader.py +++ b/deeppavlov/dataset_readers/sq_reader.py @@ -12,12 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pathlib import Path import pickle -from deeppavlov.core.data.dataset_reader import DatasetReader -from deeppavlov.core.data.utils import download from deeppavlov.core.common.registry import register +from deeppavlov.core.data.dataset_reader import DatasetReader @register('sq_reader') diff --git a/deeppavlov/dataset_readers/squad_dataset_reader.py b/deeppavlov/dataset_readers/squad_dataset_reader.py index f41f7f6723..24c0b6d159 100644 --- a/deeppavlov/dataset_readers/squad_dataset_reader.py +++ b/deeppavlov/dataset_readers/squad_dataset_reader.py @@ -47,7 +47,7 @@ class SquadDatasetReader(DatasetReader): url_sber_squad = 'http://files.deeppavlov.ai/datasets/sber_squad-v1.1.tar.gz' url_multi_squad = 'http://files.deeppavlov.ai/datasets/multiparagraph_squad.tar.gz' - def read(self, dir_path: str, dataset: Optional[str] = 'SQuAD', url: Optional[str] = None, *args, **kwargs)\ + def read(self, dir_path: str, dataset: Optional[str] = 'SQuAD', url: Optional[str] = None, *args, **kwargs) \ -> Dict[str, Dict[str, Any]]: """ diff --git a/deeppavlov/dataset_readers/typos_reader.py b/deeppavlov/dataset_readers/typos_reader.py index 8a876beb6a..4bc0f8515a 100644 --- a/deeppavlov/dataset_readers/typos_reader.py +++ b/deeppavlov/dataset_readers/typos_reader.py @@ -72,6 +72,7 @@ class TyposWikipedia(TyposCustom): English Wikipedia's list of common misspellings """ + @staticmethod def build(data_path: str) -> Path: """Download and parse common misspellings list from `Wikipedia `_ @@ -116,6 +117,7 @@ class TyposKartaslov(DatasetReader): a Russian misspellings dataset from `kartaslov `_ """ + def __init__(self): pass diff --git a/deeppavlov/dataset_readers/ubuntu_dstc7_mt_reader.py b/deeppavlov/dataset_readers/ubuntu_dstc7_mt_reader.py index 398a12a369..d7539ae171 100644 --- a/deeppavlov/dataset_readers/ubuntu_dstc7_mt_reader.py +++ b/deeppavlov/dataset_readers/ubuntu_dstc7_mt_reader.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Tuple, Dict -from pathlib import Path import json +from pathlib import Path +from typing import List, Tuple, Dict import numpy as np @@ -36,7 +36,7 @@ class UbuntuDSTC7MTReader(DatasetReader): it can be reduced to 10 (1 true response + 9 random wrong responses) to adapt with succeeding pipeline padding (str): "post" or "pre" context sentences padding """ - + def read(self, data_path: str, num_context_turns: int = 10, @@ -53,7 +53,7 @@ def read(self, dataset = {} dataset["train"] = self._create_dialog_iter(Path(data_path) / 'ubuntu_train_subtask_1.json', "train") dataset["valid"] = self._create_dialog_iter(Path(data_path) / 'ubuntu_dev_subtask_1.json', "valid") - dataset["test"] = self._create_dialog_iter(Path(data_path) / 'ubuntu_test_subtask_1.json', "test") + dataset["test"] = self._create_dialog_iter(Path(data_path) / 'ubuntu_test_subtask_1.json', "test") return dataset def _create_dialog_iter(self, filename, mode="train"): @@ -84,7 +84,7 @@ def _create_dialog_iter(self, filename, mode="train"): for entry in json_data: dialog = entry - utterances = [] # all the context sentences + utterances = [] # all the context sentences for msg in dialog['messages-so-far']: utterances.append(msg['utterance']) @@ -92,7 +92,7 @@ def _create_dialog_iter(self, filename, mode="train"): if mode != "test": true_response = dialog['options-for-correct-answers'][0]['utterance'] - fake_responses = [] # rest (wrong) responses + fake_responses = [] # rest (wrong) responses target_id = "" if mode != "test": correct_answer = dialog['options-for-correct-answers'][0] @@ -106,11 +106,13 @@ def _create_dialog_iter(self, filename, mode="train"): if mode == 'train': data.append((expanded_context + [true_response], 1)) - data.append((expanded_context + list(self.np_random.choice(fake_responses, size=1)), 0)) # random 1 from 99 + data.append( + (expanded_context + list(self.np_random.choice(fake_responses, size=1)), 0)) # random 1 from 99 elif mode == 'valid': # NOTE: labels are useless here... - data.append((expanded_context + [true_response] + list(self.np_random.choice(fake_responses, self.num_responses-1)), 0)) + data.append((expanded_context + [true_response] + list( + self.np_random.choice(fake_responses, self.num_responses - 1)), 0)) elif mode == 'test': data.append((expanded_context + fake_responses, 0)) diff --git a/deeppavlov/dataset_readers/ubuntu_v1_mt_reader.py b/deeppavlov/dataset_readers/ubuntu_v1_mt_reader.py index b4e4eb08f3..6761cee749 100644 --- a/deeppavlov/dataset_readers/ubuntu_v1_mt_reader.py +++ b/deeppavlov/dataset_readers/ubuntu_v1_mt_reader.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Tuple, Union, Dict from pathlib import Path +from typing import List, Tuple, Union, Dict from deeppavlov.core.common.registry import register from deeppavlov.core.data.dataset_reader import DatasetReader @@ -30,6 +30,7 @@ class UbuntuV1MTReader(DatasetReader): num_context_turns: A maximum number of dialogue ``context`` turns. padding: "post" or "pre" context sentences padding """ + def read(self, data_path: str, num_context_turns: int = 1, padding: str = "post", diff --git a/deeppavlov/dataset_readers/ubuntu_v2_mt_reader.py b/deeppavlov/dataset_readers/ubuntu_v2_mt_reader.py index 178c421bfc..57b779bd11 100644 --- a/deeppavlov/dataset_readers/ubuntu_v2_mt_reader.py +++ b/deeppavlov/dataset_readers/ubuntu_v2_mt_reader.py @@ -31,7 +31,7 @@ class UbuntuV2MTReader(DatasetReader): num_context_turns: A maximum number of dialogue ``context`` turns. padding: "post" or "pre" context sentences padding """ - + def read(self, data_path: str, num_context_turns: int = 1, padding: str = "post", @@ -61,7 +61,7 @@ def read(self, data_path: str, dataset["valid"] = self.preprocess_data_validation(valid_fname) dataset["test"] = self.preprocess_data_validation(test_fname) return dataset - + def preprocess_data_train(self, train_fname: Union[Path, str]) -> List[Tuple[List[str], int]]: contexts = [] responses = [] @@ -87,7 +87,7 @@ def preprocess_data_validation(self, fname: Union[Path, str]) -> List[Tuple[List contexts.append(self._expand_context(el[0].split('__eot__'), padding=self.padding)) responses.append(el[1:]) data = [el[0] + el[1] for el in zip(contexts, responses)] - data = [(el, 1) for el in data] # NOTE: labels are useless here actually... + data = [(el, 1) for el in data] # NOTE: labels are useless here actually... return data def _expand_context(self, context: List[str], padding: str) -> List[str]: diff --git a/deeppavlov/dataset_readers/ubuntu_v2_reader.py b/deeppavlov/dataset_readers/ubuntu_v2_reader.py index 2c870ea4ba..00e45bc9c2 100644 --- a/deeppavlov/dataset_readers/ubuntu_v2_reader.py +++ b/deeppavlov/dataset_readers/ubuntu_v2_reader.py @@ -52,7 +52,7 @@ def read(self, data_path: str, dataset["valid"] = self.preprocess_data_validation(valid_fname) dataset["test"] = self.preprocess_data_validation(test_fname) return dataset - + def preprocess_data_train(self, train_fname: Union[Path, str]) -> List[Tuple[List[str], int]]: contexts = [] responses = [] @@ -82,4 +82,4 @@ def preprocess_data_validation(self, fname: Union[Path, str]) -> List[Tuple[List responses.append(el[1:]) data = [[el[0]] + el[1] for el in zip(contexts, responses)] data = [(el, 1) for el in data] - return data \ No newline at end of file + return data diff --git a/deeppavlov/deep.py b/deeppavlov/deep.py index b0533f5095..27f5912987 100644 --- a/deeppavlov/deep.py +++ b/deeppavlov/deep.py @@ -47,7 +47,7 @@ parser.add_argument("--folds", help="number of folds", type=int, default=5) -parser.add_argument("-t", "--token", default=None, help="telegram bot token", type=str) +parser.add_argument("-t", "--token", default=None, help="telegram bot token", type=str) parser.add_argument("-i", "--ms-id", default=None, help="microsoft bot framework app id", type=str) parser.add_argument("-s", "--ms-secret", default=None, help="microsoft bot framework app secret", type=str) diff --git a/deeppavlov/deprecated/agent/agent.py b/deeppavlov/deprecated/agent/agent.py index 9bec306f6c..2154f9d4a5 100644 --- a/deeppavlov/deprecated/agent/agent.py +++ b/deeppavlov/deprecated/agent/agent.py @@ -50,6 +50,7 @@ class Agent(Component, metaclass=ABCMeta): We highly recommend to use wrapped skills for skills inference. dialog_logger: DeepPavlov dialog logging facility. """ + def __init__(self, skills: List[Component]) -> None: self.skills = skills self.history: Dict = defaultdict(list) @@ -118,6 +119,7 @@ class SkillWrapper: skill_id: Skill index in Agent.skills list. agent: Agent instance. """ + def __init__(self, skill: Component, skill_id: int, agent: Agent) -> None: self.skill = skill self.skill_id = skill_id diff --git a/deeppavlov/deprecated/agent/filter.py b/deeppavlov/deprecated/agent/filter.py index d263ccc777..65d60f46bd 100644 --- a/deeppavlov/deprecated/agent/filter.py +++ b/deeppavlov/deprecated/agent/filter.py @@ -22,6 +22,7 @@ class Filter(Component, metaclass=ABCMeta): which is used in Agent to select utterances from incoming batch to be processed for each Agent skill. """ + @abstractmethod def __call__(self, utterances_batch: list, history_batch: list) -> list: """Returns skills-utterances application matrix. diff --git a/deeppavlov/deprecated/agent/processor.py b/deeppavlov/deprecated/agent/processor.py index 2a87f2dee0..b2e99f9bcc 100644 --- a/deeppavlov/deprecated/agent/processor.py +++ b/deeppavlov/deprecated/agent/processor.py @@ -22,6 +22,7 @@ class Processor(Component, metaclass=ABCMeta): which is used in Agent to process skills responses and give one final response for each utterance. """ + # TODO: change *responses to [[], [], ...] argument @abstractmethod def __call__(self, utterances_batch: list, history_batch: list, *responses: list) -> list: diff --git a/deeppavlov/deprecated/agent/rich_content.py b/deeppavlov/deprecated/agent/rich_content.py index 436fa9bdc2..1b4070b951 100644 --- a/deeppavlov/deprecated/agent/rich_content.py +++ b/deeppavlov/deprecated/agent/rich_content.py @@ -24,6 +24,7 @@ class RichItem(metaclass=ABCMeta): at least in json format (mandatory) as well as in the formats compatible with other channels. """ + @abstractmethod def json(self) -> Union[list, dict]: """Returns json compatible state of the control instance including @@ -77,6 +78,7 @@ class RichControl(RichItem, metaclass=ABCMeta): control_json: Control json representation template, which contains control type and content fields. """ + def __init__(self, control_type: str) -> None: self.control_type: str = control_type self.content = None diff --git a/deeppavlov/deprecated/agents/default_agent/default_agent.py b/deeppavlov/deprecated/agents/default_agent/default_agent.py index 4737bb80c4..fbcfff1f93 100644 --- a/deeppavlov/deprecated/agents/default_agent/default_agent.py +++ b/deeppavlov/deprecated/agents/default_agent/default_agent.py @@ -46,6 +46,7 @@ class DefaultAgent(Agent): skills_processor: Initiated agent processor. skills_filter: Initiated agent filter. """ + def __init__(self, skills: List[Component], skills_processor: Optional[Processor] = None, skills_filter: Optional[Filter] = None, *args, **kwargs) -> None: super(DefaultAgent, self).__init__(skills=skills) diff --git a/deeppavlov/deprecated/agents/ecommerce_agent/ecommerce_agent.py b/deeppavlov/deprecated/agents/ecommerce_agent/ecommerce_agent.py index 5313235d5c..d4b57290c2 100644 --- a/deeppavlov/deprecated/agents/ecommerce_agent/ecommerce_agent.py +++ b/deeppavlov/deprecated/agents/ecommerce_agent/ecommerce_agent.py @@ -129,15 +129,16 @@ def _call(self, utterances_batch: List[str], utterances_ids: List[int] = None) - return [rich_message] + def _draw_tail(entropy, history): buttons_frame = ButtonsFrame(text="") - buttons_frame.add_button(Button('More', "@next:"+str(len(history)-1))) + buttons_frame.add_button(Button('More', "@next:" + str(len(history) - 1))) caption = "Press More " if entropy: caption += "specify a " + entropy[0][1] for ent_value in entropy[0][2][:4]: - button_a = Button(ent_value[0], f'@entropy:{len(history)-1}:{entropy[0][1]}:{ent_value[0]}') + button_a = Button(ent_value[0], f'@entropy:{len(history) - 1}:{entropy[0][1]}:{ent_value[0]}') buttons_frame.add_button(button_a) buttons_frame.text = caption @@ -147,10 +148,10 @@ def _draw_tail(entropy, history): def _draw_item(item, idx, history): title = item['Title'] if 'ListPrice' in item: - title += " - **$" + item['ListPrice'].split('$')[1]+"**" + title += " - **$" + item['ListPrice'].split('$')[1] + "**" buttons_frame = ButtonsFrame(text=title) - buttons_frame.add_button(Button('Show details', "@details:"+str(len(history)-2)+":"+str(idx))) + buttons_frame.add_button(Button('Show details', "@details:" + str(len(history) - 2) + ":" + str(idx))) return buttons_frame diff --git a/deeppavlov/deprecated/agents/filters/transparent_filter.py b/deeppavlov/deprecated/agents/filters/transparent_filter.py index 494ed4b8c8..1cfc8d2ae5 100644 --- a/deeppavlov/deprecated/agents/filters/transparent_filter.py +++ b/deeppavlov/deprecated/agents/filters/transparent_filter.py @@ -24,6 +24,7 @@ class TransparentFilter(Filter): Attributes: size: Number of agent skills. """ + def __init__(self, skills_count: int, *args, **kwargs) -> None: self.size: int = skills_count diff --git a/deeppavlov/deprecated/agents/processors/default_rich_content_processor.py b/deeppavlov/deprecated/agents/processors/default_rich_content_processor.py index 354fe16403..1baeb4a22f 100644 --- a/deeppavlov/deprecated/agents/processors/default_rich_content_processor.py +++ b/deeppavlov/deprecated/agents/processors/default_rich_content_processor.py @@ -18,6 +18,7 @@ class DefaultRichContentWrapper(Processor): """Returns RichControl wrapped responses with highest confidence.""" + def __init__(self, *args, **kwargs) -> None: pass diff --git a/deeppavlov/deprecated/agents/processors/highest_confidence_selector.py b/deeppavlov/deprecated/agents/processors/highest_confidence_selector.py index 8493cc5da9..21a41b85a7 100644 --- a/deeppavlov/deprecated/agents/processors/highest_confidence_selector.py +++ b/deeppavlov/deprecated/agents/processors/highest_confidence_selector.py @@ -17,6 +17,7 @@ class HighestConfidenceSelector(Processor): """Returns for each utterance response with highest confidence.""" + def __init__(self, *args, **kwargs) -> None: pass diff --git a/deeppavlov/deprecated/agents/processors/random_selector.py b/deeppavlov/deprecated/agents/processors/random_selector.py index 1b70b58218..58b5c64177 100644 --- a/deeppavlov/deprecated/agents/processors/random_selector.py +++ b/deeppavlov/deprecated/agents/processors/random_selector.py @@ -19,6 +19,7 @@ class RandomSelector(Processor): """Returns response of a random skill for each utterance.""" + def __init__(self, *args, **kwargs) -> None: pass diff --git a/deeppavlov/deprecated/agents/rich_content/default_rich_content.py b/deeppavlov/deprecated/agents/rich_content/default_rich_content.py index 5c55eee6ab..fa31c1c12d 100644 --- a/deeppavlov/deprecated/agents/rich_content/default_rich_content.py +++ b/deeppavlov/deprecated/agents/rich_content/default_rich_content.py @@ -26,6 +26,7 @@ class PlainText(RichControl): Attributes: content: Text of the message. """ + def __init__(self, text: str) -> None: super(PlainText, self).__init__('plain_text') self.content: str = text @@ -42,7 +43,7 @@ def json(self) -> dict: self.control_json['content'] = self.content return self.control_json - def ms_bot_framework(self)-> dict: + def ms_bot_framework(self) -> dict: """Returns MS Bot Framework compatible state of the PlainText instance. Creating MS Bot Framework activity blank with "text" field populated. @@ -91,6 +92,7 @@ class Button(RichControl): name: Displayed name of the button. callback: Plain text returned as callback when button pressed. """ + def __init__(self, name: str, callback: str) -> None: super(Button, self).__init__('button') self.name: str = name @@ -136,7 +138,8 @@ class ButtonsFrame(RichControl): text: Text displayed with embedded buttons. content: Container with Button objects. """ - def __init__(self, text: Optional[str]=None) -> None: + + def __init__(self, text: Optional[str] = None) -> None: super(ButtonsFrame, self).__init__('buttons_frame') self.text: [str, None] = text self.content: list = [] diff --git a/deeppavlov/deprecated/skill/skill.py b/deeppavlov/deprecated/skill/skill.py index 7e0c6aa7ac..15b46f4dbb 100644 --- a/deeppavlov/deprecated/skill/skill.py +++ b/deeppavlov/deprecated/skill/skill.py @@ -24,9 +24,10 @@ class Skill(Component, metaclass=ABCMeta): Skill is a DeepPavlov component, which provides handling dialog state, dialog history and rich content. """ + @abstractmethod def __call__(self, utterances_batch: list, history_batch: list, - states_batch: Optional[list]=None) -> Union[Tuple[list, list], Tuple[list, list, Optional[list]]]: + states_batch: Optional[list] = None) -> Union[Tuple[list, list], Tuple[list, list, Optional[list]]]: """Returns skill inference result. Returns batches of skill inference results, estimated confidence diff --git a/deeppavlov/deprecated/skills/default_skill/default_skill.py b/deeppavlov/deprecated/skills/default_skill/default_skill.py index 6894031bc7..730518d1e5 100644 --- a/deeppavlov/deprecated/skills/default_skill/default_skill.py +++ b/deeppavlov/deprecated/skills/default_skill/default_skill.py @@ -31,12 +31,13 @@ class DefaultStatelessSkill(Skill): Attributes: model: DeepPavlov model to be wrapped into default skill instance. """ - def __init__(self, model: Chainer, lang: str='en', *args, **kwargs) -> None: + + def __init__(self, model: Chainer, lang: str = 'en', *args, **kwargs) -> None: self.model = model self.proposal: str = proposals[lang] def __call__(self, utterances_batch: list, history_batch: list, - states_batch: Optional[list]=None) -> Tuple[list, list, list]: + states_batch: Optional[list] = None) -> Tuple[list, list, list]: """Returns skill inference result. Returns batches of skill inference results, estimated confidence diff --git a/deeppavlov/deprecated/skills/ecommerce_skill/bleu_retrieve.py b/deeppavlov/deprecated/skills/ecommerce_skill/bleu_retrieve.py index 00a05aaaef..4303746b47 100644 --- a/deeppavlov/deprecated/skills/ecommerce_skill/bleu_retrieve.py +++ b/deeppavlov/deprecated/skills/ecommerce_skill/bleu_retrieve.py @@ -49,7 +49,6 @@ class EcommerceSkillBleu(Skill): min_entropy: min entropy threshold for specifying """ - def __init__(self, preprocess: Component, save_path: str, @@ -74,7 +73,6 @@ def __init__(self, if kwargs.get('mode') != 'train': self.load() - def fit(self, data: List[Dict[Any, Any]]) -> None: """Preprocess items `title` and `description` from the `data` @@ -88,17 +86,15 @@ def fit(self, data: List[Dict[Any, Any]]) -> None: log.info(f"Items to nlp: {len(data)}") self.ec_data = [dict(item, **{ 'title_nlped': self.preprocess.spacy2dict(self.preprocess.analyze(item['Title'])), - 'feat_nlped': self.preprocess.spacy2dict(self.preprocess.analyze(item['Title']+'. '+item['Feature'])) + 'feat_nlped': self.preprocess.spacy2dict(self.preprocess.analyze(item['Title'] + '. ' + item['Feature'])) }) for item in data] log.info('Data are nlped') - def save(self, **kwargs) -> None: """Save classifier parameters""" log.info(f"Saving model to {self.save_path}") save_pickle(self.ec_data, self.save_path) - def load(self, **kwargs) -> None: """Load classifier parameters""" log.info(f"Loading model from {self.load_path}") @@ -110,7 +106,6 @@ def load(self, **kwargs) -> None: log.info(f"Loaded items {len(self.ec_data)}") - def __call__(self, queries: List[str], history: List[Any], states: List[Dict[Any, Any]]) -> \ Tuple[Tuple[List[Any], List[Any]], List[float], List[Any]]: """Retrieve catalog items according to the BLEU measure @@ -165,12 +160,12 @@ def __call__(self, queries: List[str], history: List[Any], states: List[Dict[Any state['Price'] = money_range score_title = [bleu_advanced(self.preprocess.lemmas(item['title_nlped']), - self.preprocess.lemmas(self.preprocess.filter_nlp_title(query)), - weights = (1,), penalty = False) for item in self.ec_data] + self.preprocess.lemmas(self.preprocess.filter_nlp_title(query)), + weights=(1,), penalty=False) for item in self.ec_data] score_feat = [bleu_advanced(self.preprocess.lemmas(item['feat_nlped']), self.preprocess.lemmas(self.preprocess.filter_nlp(query)), - weights = (0.3, 0.7), penalty = False) for idx, item in enumerate(self.ec_data)] + weights=(0.3, 0.7), penalty=False) for idx, item in enumerate(self.ec_data)] scores = np.mean([score_feat, score_title], axis=0).tolist() @@ -182,8 +177,9 @@ def __call__(self, queries: List[str], history: List[Any], states: List[Dict[Any results_args_sim = [idx for idx in results_args if scores[idx] >= self.min_similarity] - log.debug(f"Items before similarity filtering {len(results_args)} and after {len(results_args_sim)} with th={self.min_similarity} " + - f"the best one has score {scores[results_args[0]]} with title {self.ec_data[results_args[0]]['Title']}") + log.debug( + f"Items before similarity filtering {len(results_args)} and after {len(results_args_sim)} with th={self.min_similarity} " + + f"the best one has score {scores[results_args[0]]} with title {self.ec_data[results_args[0]]['Title']}") results_args_sim = self._filter_state(state, results_args_sim) @@ -202,7 +198,6 @@ def __call__(self, queries: List[str], history: List[Any], states: List[Dict[Any return (response, entropies), confidence, back_states - def _clean_items(self, results: List[int]) -> List[Any]: local_response: List = [] for idx in results: @@ -212,7 +207,6 @@ def _clean_items(self, results: List[int]) -> List[Any]: local_response.append(temp) return local_response - def _filter_state(self, state: Dict[Any, Any], results_args_sim: List[int]) -> List[Any]: for key, value in state.items(): log.debug(f"Filtering for {key}:{value}") @@ -235,7 +229,6 @@ def _filter_state(self, state: Dict[Any, Any], results_args_sim: List[int]) -> L return results_args_sim - def _entropy_subquery(self, results_args: List[int]) -> List[Tuple[float, str, List[Tuple[str, int]]]]: """Calculate entropy of selected attributes for items from the catalog. diff --git a/deeppavlov/deprecated/skills/ecommerce_skill/tfidf_retrieve.py b/deeppavlov/deprecated/skills/ecommerce_skill/tfidf_retrieve.py index a801f7e7ec..5ba516c441 100644 --- a/deeppavlov/deprecated/skills/ecommerce_skill/tfidf_retrieve.py +++ b/deeppavlov/deprecated/skills/ecommerce_skill/tfidf_retrieve.py @@ -45,7 +45,6 @@ class EcommerceSkillTfidf(Component): min_entropy: min entropy threshold for specifying """ - def __init__(self, save_path: str, load_path: str, @@ -64,7 +63,6 @@ def __init__(self, if kwargs.get('mode') != 'train': self.load() - def fit(self, data, query) -> None: """Preprocess items `title` and `description` from the `data` @@ -78,22 +76,20 @@ def fit(self, data, query) -> None: self.x_train_features = vstack(list(query)) self.ec_data = data - def save(self) -> None: """Save classifier parameters""" log.info("Saving to {}".format(self.save_path)) path = expand_path(self.save_path) save_pickle((self.ec_data, self.x_train_features), path) - def load(self) -> None: """Load classifier parameters""" log.info("Loading from {}".format(self.load_path)) self.ec_data, self.x_train_features = load_pickle( expand_path(self.load_path)) - - def __call__(self, q_vects: List[csr_matrix], histories: List[Any], states: List[Dict[Any, Any]]) -> Tuple[Tuple[List[Dict[Any, Any]], List[Any]], List[float], Dict[Any, Any]]: + def __call__(self, q_vects: List[csr_matrix], histories: List[Any], states: List[Dict[Any, Any]]) -> Tuple[ + Tuple[List[Dict[Any, Any]], List[Any]], List[float], Dict[Any, Any]]: """Retrieve catalog items according to the TFIDF measure Parameters: @@ -129,7 +125,7 @@ def __call__(self, q_vects: List[csr_matrix], histories: List[Any], states: List log.info(f"Search query {q_vect}") - if len(states) >= idx+1: + if len(states) >= idx + 1: state = states[idx] else: state = {'start': 0, 'stop': 5} @@ -184,17 +180,14 @@ def __call__(self, q_vects: List[csr_matrix], histories: List[Any], states: List entropies.append(self._entropy_subquery(answer_ids)) return (items, entropies), confidences, back_states - def _csr_to_list(self, csr: csr_matrix) -> List[Any]: return [csr.data.tolist(), csr.indices.tolist()] - def _list_to_csr(self, _list: List) -> csr_matrix: row_ind = [0] * len(_list[0]) col_ind = _list[1] return csr_matrix((_list[0], (row_ind, col_ind))) - def _take_complex_query(self, q_prev: csr_matrix, q_cur: csr_matrix) -> bool: """Decides whether to use the long compound query or the current short query @@ -217,7 +210,6 @@ def _take_complex_query(self, q_prev: csr_matrix, q_cur: csr_matrix) -> bool: return False - def _similarity(self, q_vect: Union[csr_matrix, List]) -> List[float]: """Calculates cosine similarity between the user's query and product items. @@ -229,13 +221,12 @@ def _similarity(self, q_vect: Union[csr_matrix, List]) -> List[float]: """ norm = sparse_norm(q_vect) * sparse_norm(self.x_train_features, axis=1) - cos_similarities = np.array(q_vect.dot(self.x_train_features.T).todense())/norm + cos_similarities = np.array(q_vect.dot(self.x_train_features.T).todense()) / norm cos_similarities = cos_similarities[0] cos_similarities = np.nan_to_num(cos_similarities) return cos_similarities - def _state_based_filter(self, ids: List[int], state: Dict[Any, Any]): """Filters the candidates based on the key-values from the state @@ -259,7 +250,6 @@ def _state_based_filter(self, ids: List[int], state: Dict[Any, Any]): if self.ec_data[idx][key].lower() == value.lower()] return ids - def _entropy_subquery(self, results_args: List[int]) -> List[Tuple[float, str, List[Tuple[str, int]]]]: """Calculate entropy of selected attributes for items from the catalog. diff --git a/deeppavlov/deprecated/skills/pattern_matching_skill/pattern_matching_skill.py b/deeppavlov/deprecated/skills/pattern_matching_skill/pattern_matching_skill.py index 04c0eabbf2..5fb8357d4f 100644 --- a/deeppavlov/deprecated/skills/pattern_matching_skill/pattern_matching_skill.py +++ b/deeppavlov/deprecated/skills/pattern_matching_skill/pattern_matching_skill.py @@ -44,8 +44,9 @@ class PatternMatchingSkill(Skill): ignore_case: Turns on utterances case ignoring. default_confidence: The default confidence. """ - def __init__(self, responses: List[str], patterns: Optional[List[str]]=None, - regex: bool=False, ignore_case: bool=True, default_confidence: float = 1) -> None: + + def __init__(self, responses: List[str], patterns: Optional[List[str]] = None, + regex: bool = False, ignore_case: bool = True, default_confidence: float = 1) -> None: if isinstance(responses, str): responses = [responses] self.responses = responses @@ -64,7 +65,7 @@ def __init__(self, responses: List[str], patterns: Optional[List[str]]=None, self.patterns = patterns def __call__(self, utterances_batch: list, history_batch: list, - states_batch: Optional[list]=None) -> Tuple[list, list]: + states_batch: Optional[list] = None) -> Tuple[list, list]: """Returns skill inference result. Returns batches of skill inference results, estimated confidence @@ -89,10 +90,11 @@ def __call__(self, utterances_batch: list, history_batch: list, if self.ignore_case: utterances_batch = [utterance.lower() for utterance in utterances_batch] if self.regex: - confidence = [self.default_confidence*float(any([pattern.search(utterance) for pattern in self.patterns])) - for utterance in utterances_batch] + confidence = [ + self.default_confidence * float(any([pattern.search(utterance) for pattern in self.patterns])) + for utterance in utterances_batch] else: - confidence = [self.default_confidence*float(any([pattern in utterance for pattern in self.patterns])) + confidence = [self.default_confidence * float(any([pattern in utterance for pattern in self.patterns])) for utterance in utterances_batch] return response, confidence diff --git a/deeppavlov/download.py b/deeppavlov/download.py index 6b4f48723a..e3d0d6e490 100644 --- a/deeppavlov/download.py +++ b/deeppavlov/download.py @@ -24,7 +24,8 @@ import deeppavlov from deeppavlov.core.commands.utils import expand_path, parse_config -from deeppavlov.core.data.utils import download, download_decompress, get_all_elems_from_json, file_md5, set_query_parameter, path_set_md5 +from deeppavlov.core.data.utils import download, download_decompress, get_all_elems_from_json, file_md5, \ + set_query_parameter, path_set_md5 log = getLogger(__name__) @@ -60,7 +61,7 @@ def get_config_downloads(config: Union[str, Path, dict]) -> Set[Tuple[str, Path] return downloads -def get_configs_downloads(config: Optional[Union[str, Path, dict]]=None) -> Dict[str, Set[Path]]: +def get_configs_downloads(config: Optional[Union[str, Path, dict]] = None) -> Dict[str, Set[Path]]: all_downloads = defaultdict(set) if config: @@ -105,7 +106,7 @@ def check_md5(url: str, dest_paths: List[Path]) -> bool: for base_path in not_done: log.info(f'Copying data from {done} to {base_path}') for p in expected.keys(): - shutil.copy(done/p, base_path/p) + shutil.copy(done / p, base_path / p) return True @@ -147,7 +148,7 @@ def deep_download(config: Union[str, Path, dict]) -> None: download_resource(url, dest_paths) -def main(args: Optional[List[str]]=None) -> None: +def main(args: Optional[List[str]] = None) -> None: args = parser.parse_args(args) log.info("Downloading...") download_resources(args) diff --git a/deeppavlov/evolve.py b/deeppavlov/evolve.py index ed796d33a5..206f908c97 100644 --- a/deeppavlov/evolve.py +++ b/deeppavlov/evolve.py @@ -195,7 +195,7 @@ def run_population(population, evolution, gpus): f_name = save_path / "config.json" save_json(population[i], f_name) - with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog,\ + with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog, \ save_path.joinpath('err.txt').open('w', encoding='utf8') as errlog: env = dict(os.environ) if len(gpus) > 1 or gpus[0] != -1: diff --git a/deeppavlov/metrics/bleu.py b/deeppavlov/metrics/bleu.py index e20fcc3c5b..df94c049e1 100644 --- a/deeppavlov/metrics/bleu.py +++ b/deeppavlov/metrics/bleu.py @@ -25,7 +25,7 @@ @register_metric('bleu_advanced') def bleu_advanced(y_true: List[Any], y_predicted: List[Any], - weights: Tuple=(1,), smoothing_function=SMOOTH.method1, + weights: Tuple = (1,), smoothing_function=SMOOTH.method1, auto_reweigh=False, penalty=True) -> float: """Calculate BLEU score @@ -52,7 +52,7 @@ def bleu_advanced(y_true: List[Any], y_predicted: List[Any], if penalty is True or bpenalty == 0: return bleu_measure - return bleu_measure/bpenalty + return bleu_measure / bpenalty @register_metric('bleu') @@ -78,4 +78,4 @@ def per_item_bleu(y_true, y_predicted): def per_item_dialog_bleu(y_true, y_predicted): y_true = (y['text'] for dialog in y_true for y in dialog) return corpus_bleu([[y_t.lower().split()] for y_t in y_true], - [y_p.lower().split() for y_p in y_predicted]) \ No newline at end of file + [y_p.lower().split() for y_p in y_predicted]) diff --git a/deeppavlov/metrics/elmo_metrics.py b/deeppavlov/metrics/elmo_metrics.py index 160cab88a6..e34a78e5bd 100644 --- a/deeppavlov/metrics/elmo_metrics.py +++ b/deeppavlov/metrics/elmo_metrics.py @@ -31,4 +31,3 @@ def elmo_loss2ppl(losses: List[np.ndarray]) -> float: """ avg_loss = np.mean(losses) return float(np.exp(avg_loss)) - diff --git a/deeppavlov/metrics/fmeasure.py b/deeppavlov/metrics/fmeasure.py index c915b61e47..7c2037b8fb 100644 --- a/deeppavlov/metrics/fmeasure.py +++ b/deeppavlov/metrics/fmeasure.py @@ -39,7 +39,7 @@ def ner_f1(y_true, y_predicted): @register_metric('ner_token_f1') def ner_token_f1(y_true, y_pred, print_results=False): y_true = list(chain(*y_true)) - y_pred= list(chain(*y_pred)) + y_pred = list(chain(*y_pred)) # Drop BIO or BIOES markup assert all(len(tag.split('-')) <= 2 for tag in y_true) @@ -77,7 +77,8 @@ def ner_token_f1(y_true, y_pred, print_results=False): 'f1': f1, 'n_true': n_true, 'n_pred': n_pred, 'tp': tp, 'fp': fp, 'fn': fn} - results['__total__'], accuracy, total_true_entities, total_predicted_entities, total_correct = _global_stats_f1(results) + results['__total__'], accuracy, total_true_entities, total_predicted_entities, total_correct = _global_stats_f1( + results) n_tokens = len(y_true) if print_results: log.debug('TOKEN LEVEL F1') @@ -85,7 +86,8 @@ def ner_token_f1(y_true, y_pred, print_results=False): return results['__total__']['f1'] -def _print_conll_report(results, accuracy, total_true_entities, total_predicted_entities, n_tokens, total_correct, short_report=False, entity_of_interest=None): +def _print_conll_report(results, accuracy, total_true_entities, total_predicted_entities, n_tokens, total_correct, + short_report=False, entity_of_interest=None): tags = list(results.keys()) s = 'processed {len} tokens ' \ @@ -124,12 +126,13 @@ def _print_conll_report(results, accuracy, total_true_entities, total_predicted_ tot_predicted=results[tag]['n_pred']) elif entity_of_interest is not None: s += '\t' + entity_of_interest + ': precision: {tot_prec:.2f}%; ' \ - 'recall: {tot_recall:.2f}%; ' \ - 'F1: {tot_f1:.2f} ' \ - '{tot_predicted}\n\n'.format(tot_prec=results[entity_of_interest]['precision'], - tot_recall=results[entity_of_interest]['recall'], - tot_f1=results[entity_of_interest]['f1'], - tot_predicted=results[entity_of_interest]['n_pred']) + 'recall: {tot_recall:.2f}%; ' \ + 'F1: {tot_f1:.2f} ' \ + '{tot_predicted}\n\n'.format(tot_prec=results[entity_of_interest]['precision'], + tot_recall=results[entity_of_interest]['recall'], + tot_f1=results[entity_of_interest]['f1'], + tot_predicted=results[entity_of_interest][ + 'n_pred']) log.debug(s) @@ -381,11 +384,12 @@ def precision_recall_f1(y_true, y_pred, print_results=True, short_report=False, tot_predicted=results[tag]['n_pred']) elif entity_of_interest is not None: s += '\t' + entity_of_interest + ': precision: {tot_prec:.2f}%; ' \ - 'recall: {tot_recall:.2f}%; ' \ - 'F1: {tot_f1:.2f} ' \ - '{tot_predicted}\n\n'.format(tot_prec=results[entity_of_interest]['precision'], - tot_recall=results[entity_of_interest]['recall'], - tot_f1=results[entity_of_interest]['f1'], - tot_predicted=results[entity_of_interest]['n_pred']) + 'recall: {tot_recall:.2f}%; ' \ + 'F1: {tot_f1:.2f} ' \ + '{tot_predicted}\n\n'.format( + tot_prec=results[entity_of_interest]['precision'], + tot_recall=results[entity_of_interest]['recall'], + tot_f1=results[entity_of_interest]['f1'], + tot_predicted=results[entity_of_interest]['n_pred']) log.debug(s) return results diff --git a/deeppavlov/metrics/google_bleu.py b/deeppavlov/metrics/google_bleu.py index 9fe6466ad4..f94a09cc99 100644 --- a/deeppavlov/metrics/google_bleu.py +++ b/deeppavlov/metrics/google_bleu.py @@ -26,87 +26,87 @@ def _get_ngrams(segment, max_order): - """Extracts all n-grams upto a given maximum order from an input segment. + """Extracts all n-grams upto a given maximum order from an input segment. - Args: - segment: text segment from which n-grams will be extracted. - max_order: maximum length in tokens of the n-grams returned by this - methods. + Args: + segment: text segment from which n-grams will be extracted. + max_order: maximum length in tokens of the n-grams returned by this + methods. - Returns: - The Counter containing all n-grams upto max_order in segment - with a count of how many times each n-gram occurred. - """ - ngram_counts = collections.Counter() - for order in range(1, max_order + 1): - for i in range(0, len(segment) - order + 1): - ngram = tuple(segment[i:i+order]) - ngram_counts[ngram] += 1 - return ngram_counts + Returns: + The Counter containing all n-grams upto max_order in segment + with a count of how many times each n-gram occurred. + """ + ngram_counts = collections.Counter() + for order in range(1, max_order + 1): + for i in range(0, len(segment) - order + 1): + ngram = tuple(segment[i:i + order]) + ngram_counts[ngram] += 1 + return ngram_counts def compute_bleu(reference_corpus, translation_corpus, max_order=4, smooth=False): - """Computes BLEU score of translated segments against one or more references. - - Args: - reference_corpus: list of lists of references for each translation. Each - reference should be tokenized into a list of tokens. - translation_corpus: list of translations to score. Each translation - should be tokenized into a list of tokens. - max_order: Maximum n-gram order to use when computing BLEU score. - smooth: Whether or not to apply Lin et al. 2004 smoothing. - - Returns: - 3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram - precisions and brevity penalty. - """ - matches_by_order = [0] * max_order - possible_matches_by_order = [0] * max_order - reference_length = 0 - translation_length = 0 - for (references, translation) in zip(reference_corpus, - translation_corpus): - reference_length += min(len(r) for r in references) - translation_length += len(translation) - - merged_ref_ngram_counts = collections.Counter() - for reference in references: - merged_ref_ngram_counts |= _get_ngrams(reference, max_order) - translation_ngram_counts = _get_ngrams(translation, max_order) - overlap = translation_ngram_counts & merged_ref_ngram_counts - for ngram in overlap: - matches_by_order[len(ngram)-1] += overlap[ngram] - for order in range(1, max_order+1): - possible_matches = len(translation) - order + 1 - if possible_matches > 0: - possible_matches_by_order[order-1] += possible_matches - - precisions = [0] * max_order - for i in range(0, max_order): - if smooth: - precisions[i] = ((matches_by_order[i] + 1.) / - (possible_matches_by_order[i] + 1.)) + """Computes BLEU score of translated segments against one or more references. + + Args: + reference_corpus: list of lists of references for each translation. Each + reference should be tokenized into a list of tokens. + translation_corpus: list of translations to score. Each translation + should be tokenized into a list of tokens. + max_order: Maximum n-gram order to use when computing BLEU score. + smooth: Whether or not to apply Lin et al. 2004 smoothing. + + Returns: + 3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram + precisions and brevity penalty. + """ + matches_by_order = [0] * max_order + possible_matches_by_order = [0] * max_order + reference_length = 0 + translation_length = 0 + for (references, translation) in zip(reference_corpus, + translation_corpus): + reference_length += min(len(r) for r in references) + translation_length += len(translation) + + merged_ref_ngram_counts = collections.Counter() + for reference in references: + merged_ref_ngram_counts |= _get_ngrams(reference, max_order) + translation_ngram_counts = _get_ngrams(translation, max_order) + overlap = translation_ngram_counts & merged_ref_ngram_counts + for ngram in overlap: + matches_by_order[len(ngram) - 1] += overlap[ngram] + for order in range(1, max_order + 1): + possible_matches = len(translation) - order + 1 + if possible_matches > 0: + possible_matches_by_order[order - 1] += possible_matches + + precisions = [0] * max_order + for i in range(0, max_order): + if smooth: + precisions[i] = ((matches_by_order[i] + 1.) / + (possible_matches_by_order[i] + 1.)) + else: + if possible_matches_by_order[i] > 0: + precisions[i] = (float(matches_by_order[i]) / + possible_matches_by_order[i]) + else: + precisions[i] = 0.0 + + if min(precisions) > 0: + p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions) + geo_mean = math.exp(p_log_sum) else: - if possible_matches_by_order[i] > 0: - precisions[i] = (float(matches_by_order[i]) / - possible_matches_by_order[i]) - else: - precisions[i] = 0.0 + geo_mean = 0 - if min(precisions) > 0: - p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions) - geo_mean = math.exp(p_log_sum) - else: - geo_mean = 0 + ratio = float(translation_length) / reference_length - ratio = float(translation_length) / reference_length - - if ratio > 1.0: - bp = 1. - else: - bp = math.exp(1 - 1. / ratio) + if ratio > 1.0: + bp = 1. + else: + bp = math.exp(1 - 1. / ratio) - bleu = geo_mean * bp + bleu = geo_mean * bp - return (bleu, precisions, bp, ratio, translation_length, reference_length) + return (bleu, precisions, bp, ratio, translation_length, reference_length) diff --git a/deeppavlov/metrics/recall_at_k.py b/deeppavlov/metrics/recall_at_k.py index bafc2f7332..a9525b3eb4 100644 --- a/deeppavlov/metrics/recall_at_k.py +++ b/deeppavlov/metrics/recall_at_k.py @@ -42,6 +42,7 @@ def recall_at_k(y_true: List[int], y_pred: List[List[np.ndarray]], k: int): num_correct += 1 return float(num_correct) / num_examples + @register_metric('r@1') def r_at_1(y_true, y_pred): return recall_at_k(y_true, y_pred, k=1) @@ -56,6 +57,7 @@ def r_at_2(y_true, y_pred): def r_at_5(labels, predictions): return recall_at_k(labels, predictions, k=5) + @register_metric('r@10') def r_at_10(labels, predictions): return recall_at_k(labels, predictions, k=10) diff --git a/deeppavlov/models/api_requester/api_requester.py b/deeppavlov/models/api_requester/api_requester.py index 65bf689131..df87db4523 100644 --- a/deeppavlov/models/api_requester/api_requester.py +++ b/deeppavlov/models/api_requester/api_requester.py @@ -23,7 +23,8 @@ class ApiRequester(Component): param_names: list of parameter names for API requests. debatchify: if True, single instances will be sent to the API endpoint instead of batches. """ - def __init__(self, url: str, out: [int, list], param_names: [list, tuple]=(), debatchify: bool=False, + + def __init__(self, url: str, out: [int, list], param_names: [list, tuple] = (), debatchify: bool = False, *args, **kwargs): self.url = url self.param_names = param_names diff --git a/deeppavlov/models/api_requester/api_router.py b/deeppavlov/models/api_requester/api_router.py index a59c255bc5..71f636cd2b 100644 --- a/deeppavlov/models/api_requester/api_router.py +++ b/deeppavlov/models/api_requester/api_router.py @@ -23,7 +23,7 @@ class ApiRouter(Component): n_workers: The maximum number of subprocesses to run """ - def __init__(self, api_requesters: List[ApiRequester], n_workers: int=1, *args, **kwargs): + def __init__(self, api_requesters: List[ApiRequester], n_workers: int = 1, *args, **kwargs): self.api_requesters = api_requesters self.n_workers = n_workers diff --git a/deeppavlov/models/bert/bert_classifier.py b/deeppavlov/models/bert/bert_classifier.py index d5c7b616e9..a22307046a 100644 --- a/deeppavlov/models/bert/bert_classifier.py +++ b/deeppavlov/models/bert/bert_classifier.py @@ -48,6 +48,7 @@ class BertClassifierModel(LRScheduledTFModel): pretrained_bert: pretrained Bert checkpoint min_learning_rate: min value of learning rate if learning rate decay is used """ + # TODO: add warmup # TODO: add head-only pre-training def __init__(self, bert_config_file, n_classes, keep_prob, @@ -153,7 +154,7 @@ def _init_placeholders(self): self.token_types_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='token_types_ph') if not self.one_hot_labels: - self.y_ph = tf.placeholder(shape=(None, ), dtype=tf.int32, name='y_ph') + self.y_ph = tf.placeholder(shape=(None,), dtype=tf.int32, name='y_ph') else: self.y_ph = tf.placeholder(shape=(None, self.n_classes), dtype=tf.float32, name='y_ph') diff --git a/deeppavlov/models/bert/bert_ranker.py b/deeppavlov/models/bert/bert_ranker.py index 4bed99afca..d01923e417 100644 --- a/deeppavlov/models/bert/bert_ranker.py +++ b/deeppavlov/models/bert/bert_ranker.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from logging import getLogger -from typing import List, Dict, Union -from collections import OrderedDict import re +from collections import OrderedDict +from logging import getLogger from operator import itemgetter +from typing import List, Dict, Union import numpy as np import tensorflow as tf @@ -26,8 +26,8 @@ from deeppavlov.core.commands.utils import expand_path from deeppavlov.core.common.registry import register -from deeppavlov.models.bert.bert_classifier import BertClassifierModel from deeppavlov.core.models.tf_model import LRScheduledTFModel +from deeppavlov.models.bert.bert_classifier import BertClassifierModel logger = getLogger(__name__) @@ -301,7 +301,6 @@ def train_on_batch(self, features_li: List[List[InputFeatures]], y: Union[List[i _, loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict) return {'loss': loss, 'learning_rate': feed_dict[self.learning_rate_ph]} - def __call__(self, features_li: List[List[InputFeatures]]) -> Union[List[int], List[List[float]]]: """Calculate scores for the given context over candidate responses. @@ -362,8 +361,8 @@ class BertSepRankerPredictor(BertSepRankerModel): """ def __init__(self, bert_config_file, interact_mode=0, batch_size=32, - resps=None, resp_features=None, resp_vecs=None, - conts=None, cont_features=None, cont_vecs=None, **kwargs) -> None: + resps=None, resp_features=None, resp_vecs=None, + conts=None, cont_features=None, cont_vecs=None, **kwargs) -> None: super().__init__(bert_config_file=bert_config_file, **kwargs) diff --git a/deeppavlov/models/bert/bert_squad.py b/deeppavlov/models/bert/bert_squad.py index 7f5cfd6491..c0fa1550aa 100644 --- a/deeppavlov/models/bert/bert_squad.py +++ b/deeppavlov/models/bert/bert_squad.py @@ -137,7 +137,7 @@ def _init_graph(self): logit_mask = self.token_types_ph # [CLS] token is used as no answer - mask = tf.concat([tf.ones((bs, 1), dtype=tf.int32), tf.zeros((bs, seq_len-1), dtype=tf.int32)], axis=-1) + mask = tf.concat([tf.ones((bs, 1), dtype=tf.int32), tf.zeros((bs, seq_len - 1), dtype=tf.int32)], axis=-1) logit_mask = logit_mask + mask logits_st = softmax_mask(logits_st, logit_mask) @@ -258,7 +258,8 @@ def __call__(self, features: List[InputFeatures]) -> Tuple[List[int], List[int], input_type_ids = [f.input_type_ids for f in features] feed_dict = self._build_feed_dict(input_ids, input_masks, input_type_ids) - st, end, logits, scores = self.sess.run([self.start_pred, self.end_pred, self.yp_logits, self.yp_score], feed_dict=feed_dict) + st, end, logits, scores = self.sess.run([self.start_pred, self.end_pred, self.yp_logits, self.yp_score], + feed_dict=feed_dict) return st, end, logits.tolist(), scores.tolist() @@ -288,6 +289,7 @@ class BertSQuADInferModel(Component): lang: either `en` or `ru`, it is used to select sentence tokenizer """ + def __init__(self, squad_model_config: str, vocab_file: str, do_lower_case: bool, diff --git a/deeppavlov/models/classifiers/keras_classification_model.py b/deeppavlov/models/classifiers/keras_classification_model.py index bcde5df3d3..865ef811a9 100644 --- a/deeppavlov/models/classifiers/keras_classification_model.py +++ b/deeppavlov/models/classifiers/keras_classification_model.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from copy import deepcopy +from logging import getLogger from pathlib import Path from typing import List, Tuple, Optional, Generator, Union -from logging import getLogger -from copy import deepcopy -import numpy as np import keras.metrics import keras.optimizers +import numpy as np from keras import backend as K from keras.layers import Dense, Input from keras.layers import concatenate, Activation, Concatenate, Reshape @@ -36,8 +36,8 @@ from deeppavlov.core.common.errors import ConfigError from deeppavlov.core.common.file import save_json, read_json from deeppavlov.core.common.registry import register -from deeppavlov.core.models.keras_model import LRScheduledKerasModel from deeppavlov.core.layers.keras_layers import additive_self_attention, multiplicative_self_attention +from deeppavlov.core.models.keras_model import LRScheduledKerasModel log = getLogger(__name__) @@ -284,7 +284,8 @@ def _load(self, model_name: str) -> None: try: model.load_weights(str(weights_path)) except ValueError: - raise ConfigError("Some non-changable parameters of neural network differ from given pre-trained model") + raise ConfigError( + "Some non-changable parameters of neural network differ from given pre-trained model") self.model = model @@ -378,7 +379,8 @@ def save(self, fname: str = None) -> None: self.opt["epochs_done"] = self.epochs_done if isinstance(self.opt.get("learning_rate", None), float): self.opt["final_learning_rate"] = K.eval(self.optimizer.lr) / (1. + - K.eval(self.optimizer.decay) * self.batches_seen) + K.eval( + self.optimizer.decay) * self.batches_seen) if self.opt.get("load_path") and self.opt.get("save_path"): if self.opt.get("save_path") != self.opt.get("load_path"): diff --git a/deeppavlov/models/classifiers/ru_obscenity_classifier.py b/deeppavlov/models/classifiers/ru_obscenity_classifier.py index d22ead8fa1..2c3ef26d36 100644 --- a/deeppavlov/models/classifiers/ru_obscenity_classifier.py +++ b/deeppavlov/models/classifiers/ru_obscenity_classifier.py @@ -1,16 +1,14 @@ -from typing import List, Union -from pathlib import Path -from logging import getLogger -import errno import json import re -import os +from logging import getLogger +from pathlib import Path +from typing import List, Union import pymorphy2 +from deeppavlov.core.commands.utils import expand_path from deeppavlov.core.common.registry import register from deeppavlov.core.models.estimator import Component -from deeppavlov.core.commands.utils import expand_path log = getLogger(__name__) @@ -107,14 +105,14 @@ def _check_obscenity(self, text: str) -> bool: word = word.lower() word.replace('ё', 'е') normal_word = self.morph.parse(word)[0].normal_form - if normal_word in self.obscenity_words_exception\ + if normal_word in self.obscenity_words_exception \ or word in self.obscenity_words_exception: continue - if normal_word in self.obscenity_words\ - or word in self.obscenity_words\ - or bool(self.regexp.findall(normal_word))\ - or bool(self.regexp.findall(word))\ - or bool(self.regexp2.findall(normal_word))\ + if normal_word in self.obscenity_words \ + or word in self.obscenity_words \ + or bool(self.regexp.findall(normal_word)) \ + or bool(self.regexp.findall(word)) \ + or bool(self.regexp2.findall(normal_word)) \ or bool(self.regexp2.findall(word)): return True return False diff --git a/deeppavlov/models/classifiers/utils.py b/deeppavlov/models/classifiers/utils.py index 03e6feabc0..14b01e960c 100644 --- a/deeppavlov/models/classifiers/utils.py +++ b/deeppavlov/models/classifiers/utils.py @@ -21,7 +21,7 @@ log = getLogger(__name__) -def labels2onehot(labels: [List[str], List[List[str]], np.ndarray], classes: [list, np.ndarray]) -> np.ndarray: +def labels2onehot(labels: [List[str], List[List[str]], np.ndarray], classes: [list, np.ndarray]) -> np.ndarray: """ Convert labels to one-hot vectors for multi-class multi-label classification @@ -49,7 +49,7 @@ def labels2onehot(labels: [List[str], List[List[str]], np.ndarray], classes: [l return y -def proba2labels(proba: [list, np.ndarray], confident_threshold: float, classes: [list, np.ndarray]) -> List[List]: +def proba2labels(proba: [list, np.ndarray], confident_threshold: float, classes: [list, np.ndarray]) -> List[List]: """ Convert vectors of probabilities to labels using confident threshold (if probability to belong with the class is bigger than confident_threshold, sample belongs with the class; @@ -74,7 +74,7 @@ def proba2labels(proba: [list, np.ndarray], confident_threshold: float, classes: return y -def proba2onehot(proba: [list, np.ndarray], confident_threshold: float, classes: [list, np.ndarray]) -> np.ndarray: +def proba2onehot(proba: [list, np.ndarray], confident_threshold: float, classes: [list, np.ndarray]) -> np.ndarray: """ Convert vectors of probabilities to one-hot representations using confident threshold diff --git a/deeppavlov/models/elmo/bilm_model.py b/deeppavlov/models/elmo/bilm_model.py index 665bd2ca86..cc7eacb8b0 100644 --- a/deeppavlov/models/elmo/bilm_model.py +++ b/deeppavlov/models/elmo/bilm_model.py @@ -48,6 +48,7 @@ class LanguageModel(object): 'dim' is the hidden state size. Set 'dim' == 'projection_dim' to skip a projection layer. """ + def __init__(self, options, is_training): self.options = options self.is_training = is_training @@ -285,14 +286,14 @@ def high(x, ww_carry, bb_carry, ww_tr, bb_tr): W_carry, b_carry, W_transform, b_transform) self.token_embedding_layers.append(tf.reshape(embedding, - [batch_size, unroll_steps, highway_dim])) + [batch_size, unroll_steps, highway_dim])) # finally project down to projection dim if needed if use_proj: embedding = tf.matmul(embedding, W_proj_cnn) + b_proj_cnn if self.bidirectional: embedding_reverse = tf.matmul(embedding_reverse, W_proj_cnn) \ - + b_proj_cnn + + b_proj_cnn self.token_embedding_layers.append( tf.reshape(embedding, [batch_size, unroll_steps, projection_dim]) ) diff --git a/deeppavlov/models/elmo/elmo.py b/deeppavlov/models/elmo/elmo.py index cb9b28c647..f197ae7c15 100644 --- a/deeppavlov/models/elmo/elmo.py +++ b/deeppavlov/models/elmo/elmo.py @@ -214,7 +214,7 @@ def __init__(self, unroll_steps: Optional[int] = None, n_tokens_vocab: Optional[int] = None, lstm: Optional[dict] = None, - dropout: Optional[float] = None, # Regularization + dropout: Optional[float] = None, # Regularization n_negative_samples_batch: Optional[int] = None, # Train options all_clip_norm_val: Optional[float] = None, initial_accumulator_value: float = 1.0, @@ -230,10 +230,10 @@ def __init__(self, **kwargs) -> None: # ================ Checking input args ================= - if not(options_json_path or (char_cnn and bidirectional and unroll_steps - and n_tokens_vocab and lstm and dropout and - n_negative_samples_batch and all_clip_norm_val - )): + if not (options_json_path or (char_cnn and bidirectional and unroll_steps + and n_tokens_vocab and lstm and dropout and + n_negative_samples_batch and all_clip_norm_val + )): raise Warning('Use options_json_path or/and direct params to set net architecture.') self.options = self._load_options(options_json_path) self._update_arch_options(char_cnn, bidirectional, unroll_steps, n_tokens_vocab, lstm) @@ -250,7 +250,7 @@ def __init__(self, self.train_options = {} self.valid_options = {'batch_size': 256, 'unroll_steps': 1, 'n_gpus': 1} - self.model_mode='' + self.model_mode = '' tf.set_random_seed(seed) np.random.seed(seed) @@ -306,7 +306,7 @@ def _update_other_options(self, dropout, n_negative_samples_batch, all_clip_norm if all_clip_norm_val is not None: self.options['all_clip_norm_val'] = all_clip_norm_val - def _get_epoch_from(self, epoch_load_path, default = 0): + def _get_epoch_from(self, epoch_load_path, default=0): path = self.load_path path = path.parent / epoch_load_path candidates = path.resolve().glob('[0-9]*') @@ -315,7 +315,7 @@ def _get_epoch_from(self, epoch_load_path, default = 0): epoch_num = max(candidates, default=default) return epoch_num - def _build_graph(self, graph, train = True): + def _build_graph(self, graph, train=True): with graph.as_default(): with tf.device('/cpu:0'): init_step = 0 @@ -417,8 +417,8 @@ def _init_session(self): def _fill_feed_dict(self, char_ids_batches, reversed_char_ids_batches, - token_ids_batches = None, - reversed_token_ids_batches = None): + token_ids_batches=None, + reversed_token_ids_batches=None): # init state tensors feed_dict = {t: v for t, v in zip(self.init_state_tensors, self.init_state_values)} @@ -475,7 +475,7 @@ def load(self, epoch: Optional[int] = None) -> None: saver = tf.train.Saver() saver.restore(self.sess, path) else: - log.info(f'[A checkpoint not found in {path}]') + log.info(f'[A checkpoint not found in {path}]') @overrides def save(self, epoch: Optional[int] = None) -> None: @@ -540,7 +540,7 @@ def _build_model(self, train: bool, epoch: Optional[int] = None, **kwargs): train=False) with self.graph.as_default(): - self.init_state_values, self.init_state_tensors, self.final_state_tensors =\ + self.init_state_values, self.init_state_tensors, self.final_state_tensors = \ self._init_session() self.load(epoch) diff --git a/deeppavlov/models/elmo/elmo2tfhub.py b/deeppavlov/models/elmo/elmo2tfhub.py index 72276707d5..a304bf6837 100644 --- a/deeppavlov/models/elmo/elmo2tfhub.py +++ b/deeppavlov/models/elmo/elmo2tfhub.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import shutil + import numpy as np import tensorflow as tf import tensorflow_hub as hub -import shutil from deeppavlov.models.elmo.elmo_model import BidirectionalLanguageModel, weight_layers @@ -30,6 +31,7 @@ def make_module_spec(options, weight_file): Returns: A module spec object used for constructing a TF-Hub module. """ + def module_fn(): """Spec function for a token embedding module.""" # init @@ -89,7 +91,7 @@ def sentence_tagging_and_padding(sen_dim): # Input placeholders to the biLM. tokens = tf.placeholder(shape=(None, None), dtype=tf.string, name='ph2tokens') - sequence_len = tf.placeholder(shape=(None, ), dtype=tf.int32, name='ph2sequence_len') + sequence_len = tf.placeholder(shape=(None,), dtype=tf.int32, name='ph2sequence_len') tok_shape = tf.shape(tokens) line_tokens = tf.reshape(tokens, shape=[-1], name='reshape2line_tokens') diff --git a/deeppavlov/models/elmo/elmo_model.py b/deeppavlov/models/elmo/elmo_model.py index 000af987e6..8e475dcedb 100644 --- a/deeppavlov/models/elmo/elmo_model.py +++ b/deeppavlov/models/elmo/elmo_model.py @@ -142,7 +142,7 @@ def _build_ops(self, lm_graph): for layer in layers: layer_wo_bos_eos = layer[:, 1:, :] layer_wo_bos_eos = tf.reverse_sequence( - layer_wo_bos_eos, + layer_wo_bos_eos, lm_graph.sequence_lengths - 1, seq_axis=1, batch_axis=0, @@ -182,7 +182,7 @@ def _build_ops(self, lm_graph): mask_wo_bos_eos = tf.cast(mask_wo_bos_eos, 'bool') return { - 'lm_embeddings': lm_embeddings, + 'lm_embeddings': lm_embeddings, 'lengths': sequence_length_wo_bos_eos, 'token_embeddings': lm_graph.embedding, 'mask': mask_wo_bos_eos, @@ -253,6 +253,7 @@ class BidirectionalLanguageModelGraph(object): Creates the computational graph and holds the ops necessary for runnint a bidirectional language model """ + def __init__(self, options, weight_file, ids_placeholder, use_character_inputs=True, embedding_weight_file=None, max_batch_size=128): @@ -405,15 +406,15 @@ def make_convolutions(inp): if use_proj: assert n_filters > projection_dim with tf.variable_scope('CNN_proj'): - W_proj_cnn = tf.get_variable( - "W_proj", [n_filters, projection_dim], - initializer=tf.random_normal_initializer( - mean=0.0, stddev=np.sqrt(1.0 / n_filters)), - dtype=DTYPE) - b_proj_cnn = tf.get_variable( - "b_proj", [projection_dim], - initializer=tf.constant_initializer(0.0), - dtype=DTYPE) + W_proj_cnn = tf.get_variable( + "W_proj", [n_filters, projection_dim], + initializer=tf.random_normal_initializer( + mean=0.0, stddev=np.sqrt(1.0 / n_filters)), + dtype=DTYPE) + b_proj_cnn = tf.get_variable( + "b_proj", [projection_dim], + initializer=tf.constant_initializer(0.0), + dtype=DTYPE) # apply highways layers def high(x, ww_carry, bb_carry, ww_tr, bb_tr): @@ -586,7 +587,7 @@ def _build_lstms(self): init_states[i][batch_size:, :]], axis=0) state_update_op = tf.assign(init_states[i], new_state) update_ops.append(state_update_op) - + layer_input = layer_output self.mask = mask @@ -623,6 +624,7 @@ def weight_layers(name, bilm_ops, l2_coef=None, 'regularization_op': op to compute regularization term } """ + def _l2_regularizer(weights): if l2_coef is not None: return l2_coef * tf.reduce_sum(tf.square(weights)) @@ -647,7 +649,7 @@ def _do_ln(x): x_masked = x * broadcast_mask N = tf.reduce_sum(mask_float) * lm_dim mean = tf.reduce_sum(x_masked) / N - variance = tf.reduce_sum(((x_masked - mean) * broadcast_mask)**2) / N + variance = tf.reduce_sum(((x_masked - mean) * broadcast_mask) ** 2) / N return tf.nn.batch_normalization( x, mean, variance, None, None, 1E-12 ) @@ -662,7 +664,7 @@ def _do_ln(x): with tf.variable_scope("aggregation", reuse=reuse): W = tf.get_variable( '{}_ELMo_W'.format(name), - shape=(n_lm_layers, ), + shape=(n_lm_layers,), initializer=tf.zeros_initializer, regularizer=_l2_regularizer, trainable=True, @@ -697,7 +699,7 @@ def _do_ln(x): with tf.variable_scope("aggregation", reuse=reuse): gamma = tf.get_variable( '{}_ELMo_gamma'.format(name), - shape=(1, ), + shape=(1,), initializer=tf.ones_initializer, regularizer=None, trainable=True, diff --git a/deeppavlov/models/embedders/abstract_embedder.py b/deeppavlov/models/embedders/abstract_embedder.py index 7a297212f4..c9a52c2b70 100644 --- a/deeppavlov/models/embedders/abstract_embedder.py +++ b/deeppavlov/models/embedders/abstract_embedder.py @@ -43,6 +43,7 @@ class Embedder(Component, Serializable, metaclass=ABCMeta): mean: whether to return one mean embedding vector per sample load_path: path with pre-trained fastText binary model """ + def __init__(self, load_path: Union[str, Path], pad_zero: bool = False, mean: bool = False, **kwargs) -> None: """ Initialize embedder with given parameters diff --git a/deeppavlov/models/embedders/elmo_embedder.py b/deeppavlov/models/embedders/elmo_embedder.py index a6dd603bb6..09990ce648 100644 --- a/deeppavlov/models/embedders/elmo_embedder.py +++ b/deeppavlov/models/embedders/elmo_embedder.py @@ -131,6 +131,7 @@ class ELMoEmbedder(Component, metaclass=TfModelMeta): """ + def __init__(self, spec: str, elmo_output_names: Optional[List] = None, dim: Optional[int] = None, pad_zero: bool = False, concat_last_axis: bool = True, max_token: Optional[int] = None, @@ -224,7 +225,7 @@ def _fill_batch(self, batch): batch = [batch_line[:self.max_token] for batch_line in batch] tokens_length = [len(batch_line) for batch_line in batch] tokens_length_max = max(tokens_length) - batch = [batch_line + ['']*(tokens_length_max - len(batch_line)) for batch_line in batch] + batch = [batch_line + [''] * (tokens_length_max - len(batch_line)) for batch_line in batch] return batch, tokens_length diff --git a/deeppavlov/models/embedders/tfidf_weighted_embedder.py b/deeppavlov/models/embedders/tfidf_weighted_embedder.py index c80a72601c..880138de33 100644 --- a/deeppavlov/models/embedders/tfidf_weighted_embedder.py +++ b/deeppavlov/models/embedders/tfidf_weighted_embedder.py @@ -197,7 +197,8 @@ def __call__(self, batch: List[List[str]], tags_batch: Optional[List[List[str]]] if self.tags_vocab: if tags_batch is None: raise ConfigError("TfidfWeightedEmbedder got 'tags_vocab_path' but __call__ did not get tags_batch.") - batch = [self._tags_encode(sample, tags_sample, mean=mean) for sample, tags_sample in zip(batch, tags_batch)] + batch = [self._tags_encode(sample, tags_sample, mean=mean) for sample, tags_sample in + zip(batch, tags_batch)] else: if tags_batch: raise ConfigError("TfidfWeightedEmbedder got tags batch, but 'tags_vocab_path' is empty.") @@ -301,4 +302,3 @@ def _tags_encode(self, tokens: List[str], tags: List[str], mean: bool) -> Union[ embedded_tokens = np.array([weights[i] * embedded_tokens[i] for i in range(len(tokens))]) return embedded_tokens - diff --git a/deeppavlov/models/go_bot/network.py b/deeppavlov/models/go_bot/network.py index e7c304f95d..1401aaeae6 100644 --- a/deeppavlov/models/go_bot/network.py +++ b/deeppavlov/models/go_bot/network.py @@ -13,9 +13,9 @@ # limitations under the License. import collections +import copy import json import re -import copy from logging import getLogger from typing import Dict, Any, List, Optional, Union, Tuple @@ -282,7 +282,7 @@ def _encode_context(self, # random embedding instead of zeros if np.all(emb_features < 1e-20): emb_dim = self.embedder.dim - emb_features = np.fabs(np.random.normal(0, 1/emb_dim, emb_dim)) + emb_features = np.fabs(np.random.normal(0, 1 / emb_dim, emb_dim)) # Intent features intent_features = [] @@ -321,11 +321,11 @@ def _encode_context(self, if self.debug: log.debug(f"Context features = {context_features}") - debug_msg = f"num bow features = {bow_features}" +\ - f", num emb features = {emb_features}" +\ - f", num intent features = {intent_features}" +\ - f", num state features = {len(state_features)}" +\ - f", num context features = {len(context_features)}" +\ + debug_msg = f"num bow features = {bow_features}" + \ + f", num emb features = {emb_features}" + \ + f", num intent features = {intent_features}" + \ + f", num state features = {len(state_features)}" + \ + f", num context features = {len(context_features)}" + \ f", prev_action shape = {len(state['prev_action'])}" log.debug(debug_msg) @@ -559,7 +559,7 @@ def network_call(self, feed_dict[self._emb_context] = emb_context feed_dict[self._key] = key - probs, prediction, state =\ + probs, prediction, state = \ self.sess.run([self._probs, self._prediction, self._state], feed_dict=feed_dict) diff --git a/deeppavlov/models/go_bot/templates.py b/deeppavlov/models/go_bot/templates.py index 1b5870ba34..e6cb49d663 100644 --- a/deeppavlov/models/go_bot/templates.py +++ b/deeppavlov/models/go_bot/templates.py @@ -87,13 +87,13 @@ def update(self, default="", dontcare=""): self.dontcare = self.dontcare or dontcare def __contains__(self, t): - return t.default and (t.default == self.default)\ - or t.dontcare and (t.dontcare == self.dontcare) + return t.default and (t.default == self.default) \ + or t.dontcare and (t.dontcare == self.dontcare) def __eq__(self, other): if isinstance(other, self.__class__): - return (self.default == other.default)\ - and (self.dontcare == other.dontcare) + return (self.default == other.default) \ + and (self.dontcare == other.dontcare) return False def __hash__(self): diff --git a/deeppavlov/models/go_bot/tracker.py b/deeppavlov/models/go_bot/tracker.py index 9b16a0220f..bcf7c7f866 100644 --- a/deeppavlov/models/go_bot/tracker.py +++ b/deeppavlov/models/go_bot/tracker.py @@ -64,6 +64,7 @@ class DefaultTracker(Tracker): Parameters: slot_names: list of slots that should be tracked. """ + def __init__(self, slot_names: List[str]) -> None: self.slot_names = list(slot_names) self.reset_state() @@ -83,6 +84,7 @@ def reset_state(self): def update_state(self, slots): def _filter(slots): return filter(lambda s: s[0] in self.slot_names, slots) + if isinstance(slots, list): self.history.extend(_filter(slots)) elif isinstance(slots, dict): @@ -120,6 +122,7 @@ class FeaturizedTracker(Tracker): Parameters: slot_names: list of slots that should be tracked. """ + def __init__(self, slot_names: List[str]) -> None: self.slot_names = list(slot_names) self.reset_state() @@ -139,6 +142,7 @@ def reset_state(self): def update_state(self, slots): def _filter(slots): return filter(lambda s: s[0] in self.slot_names, slots) + prev_state = self.get_state() if isinstance(slots, list): self.history.extend(_filter(slots)) @@ -174,7 +178,7 @@ def _diff_features(self, state): feats = np.zeros(self.state_size, dtype=np.float32) curr_state = self.get_state() for i, slot in enumerate(self.slot_names): - if (slot in curr_state) and (slot in state) and\ + if (slot in curr_state) and (slot in state) and \ (curr_state[slot] != state[slot]): feats[i] = 1. return feats diff --git a/deeppavlov/models/kbqa/entity_linking.py b/deeppavlov/models/kbqa/entity_linking.py index 90090fe206..0fd9408aba 100644 --- a/deeppavlov/models/kbqa/entity_linking.py +++ b/deeppavlov/models/kbqa/entity_linking.py @@ -11,18 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import itertools import pickle from logging import getLogger from typing import List, Dict, Tuple, Optional -import itertools -from fuzzywuzzy import fuzz -import pymorphy2 import nltk +import pymorphy2 +from fuzzywuzzy import fuzz from deeppavlov.core.common.registry import register -from deeppavlov.core.models.serializable import Serializable from deeppavlov.core.models.component import Component +from deeppavlov.core.models.serializable import Serializable from deeppavlov.models.spelling_correction.levenshtein.levenshtein_searcher import LevenshteinSearcher log = getLogger(__name__) @@ -149,7 +149,7 @@ def _log_entities(self, srtd_cand_ent): entities_to_print = [] for name, q, ratio, n_rel in srtd_cand_ent: entities_to_print.append(f'{name}, http://wikidata.org/wiki/{q}, {ratio}, {n_rel}') - log.debug('\n'+'\n'.join(entities_to_print)) + log.debug('\n' + '\n'.join(entities_to_print)) def find_candidate_entities(self, entity: str) -> List[str]: candidate_entities = list(self.name_to_q.get(entity, [])) @@ -226,7 +226,8 @@ def filter_triplets_rus(entity_triplets: List[List[List[str]]], question_tokens: if triplet[0] == property_is_instance_of and triplet[1] == id_for_entity_asteroid: entity_is_asteroid = True break - if found_what_template and (entity_is_human or entity_is_named or entity_is_asteroid or wiki_entity[2]<90): + if found_what_template and ( + entity_is_human or entity_is_named or entity_is_asteroid or wiki_entity[2] < 90): continue filtered_entity_triplets.append(triplets_for_entity) filtered_entities.append(wiki_entity) @@ -270,8 +271,8 @@ def candidate_entities_names(self, candidate_entities: List[Tuple[str]]) -> List return candidate_names def sort_found_entities(self, candidate_entities: List[Tuple[str]], - candidate_names: List[List[str]], - entity: str) -> Tuple[List[str], List[str], List[Tuple[str]]]: + candidate_names: List[List[str]], + entity: str) -> Tuple[List[str], List[str], List[Tuple[str]]]: entities_ratios = [] for candidate, entity_names in zip(candidate_entities, candidate_names): entity_id = candidate[0] @@ -286,6 +287,6 @@ def sort_found_entities(self, candidate_entities: List[Tuple[str]], srtd_with_ratios = sorted(entities_ratios, key=lambda x: (x[2], x[3]), reverse=True) wiki_entities = [ent[1] for ent in srtd_with_ratios if ent[2] > 84] - confidences = [float(ent[2])*0.01 for ent in srtd_with_ratios if ent[2] > 84] - + confidences = [float(ent[2]) * 0.01 for ent in srtd_with_ratios if ent[2] > 84] + return wiki_entities, confidences, srtd_with_ratios diff --git a/deeppavlov/models/kbqa/kb_answer_parser_wikidata.py b/deeppavlov/models/kbqa/kb_answer_parser_wikidata.py index 9e422d304c..e866b8b501 100644 --- a/deeppavlov/models/kbqa/kb_answer_parser_wikidata.py +++ b/deeppavlov/models/kbqa/kb_answer_parser_wikidata.py @@ -13,16 +13,15 @@ # limitations under the License. import pickle -from pathlib import Path -from string import punctuation from logging import getLogger +from string import punctuation from typing import List, Tuple, Optional, Dict import numpy as np -from deeppavlov.core.models.serializable import Serializable from deeppavlov.core.common.registry import register from deeppavlov.core.models.component import Component +from deeppavlov.core.models.serializable import Serializable from deeppavlov.models.kbqa.entity_linking import EntityLinker log = getLogger(__name__) @@ -115,7 +114,7 @@ def __call__(self, tokens_batch: List[List[str]], top_k_relations, top_k_probs = self._parse_relations_probs(relations_probs) top_k_relation_names = [self._relations_mapping[rel] for rel in top_k_relations] if self._debug: - log.debug("top k relations {}" .format(str(top_k_relation_names))) + log.debug("top k relations {}".format(str(top_k_relation_names))) obj, confidence = self._match_triplet(entity_triplets, entity_linking_confidences, top_k_relations, @@ -194,7 +193,7 @@ def entities_and_rels_from_templates(self, tokens: List[List[str]]) -> Tuple[str if template_start in s_sanitized and template_end in s_sanitized: template_start_pos = s_sanitized.find(template_start) template_end_pos = s_sanitized.find(template_end) - ent_cand = s_sanitized[template_start_pos+len(template_start): template_end_pos or len(s_sanitized)] + ent_cand = s_sanitized[template_start_pos + len(template_start): template_end_pos or len(s_sanitized)] if len(ent_cand) < len(ent) or len(ent) == 0: ent = ent_cand relation = self.templates[template] diff --git a/deeppavlov/models/morpho_tagger/cells.py b/deeppavlov/models/morpho_tagger/cells.py index b1fc40e72c..1736859c75 100644 --- a/deeppavlov/models/morpho_tagger/cells.py +++ b/deeppavlov/models/morpho_tagger/cells.py @@ -68,7 +68,6 @@ def weighted_sum(first, second, sigma, first_threshold=-np.inf, second_threshold class WeightedCombinationLayer(kl.Layer): - """ A class for weighted combination of probability distributions """ @@ -132,7 +131,7 @@ def call(self, inputs, **kwargs): embedded_features = kb.bias_add( embedded_features, self.features_bias, data_format="channels_last") if self.use_dimension_bias: - tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]] + tiling_shape = [1] * (kb.ndim(first) - 1) + [kb.shape(first)[-1]] embedded_features = kb.tile(embedded_features, tiling_shape) embedded_features = kb.bias_add( embedded_features, self.dimensions_bias, data_format="channels_last") @@ -177,4 +176,4 @@ def positions_func(inputs, pad=0): """ position_inputs = kb.cumsum(kb.ones_like(inputs, dtype="float32"), axis=1) position_inputs *= kb.cast(kb.not_equal(inputs, pad), "float32") - return kb.log(1.0 + position_inputs) \ No newline at end of file + return kb.log(1.0 + position_inputs) diff --git a/deeppavlov/models/morpho_tagger/common.py b/deeppavlov/models/morpho_tagger/common.py index 9abf0bc06e..4f6371536a 100644 --- a/deeppavlov/models/morpho_tagger/common.py +++ b/deeppavlov/models/morpho_tagger/common.py @@ -109,7 +109,7 @@ def set_format_mode(self, format_mode: str = "basic") -> None: def _make_format_string(self) -> None: if self.format_mode == "basic": - self.format_string = "{}\t{}\t{}\t{}" + self.format_string = "{}\t{}\t{}\t{}" elif self.format_mode.lower() in ["conllu", "ud"]: self.format_string = "{}\t{}\t_\t{}\t_\t{}\t_\t_\t_\t_" else: diff --git a/deeppavlov/models/morpho_tagger/common_tagger.py b/deeppavlov/models/morpho_tagger/common_tagger.py index 6b7b905b39..86959d9919 100644 --- a/deeppavlov/models/morpho_tagger/common_tagger.py +++ b/deeppavlov/models/morpho_tagger/common_tagger.py @@ -21,6 +21,7 @@ EPS = 1e-15 + # AUXILIARY = ['PAD', 'BEGIN', 'END', 'UNKNOWN'] # AUXILIARY_CODES = PAD, BEGIN, END, UNKNOWN = 0, 1, 2, 3 diff --git a/deeppavlov/models/morpho_tagger/lemmatizer.py b/deeppavlov/models/morpho_tagger/lemmatizer.py index 2f0450ab51..7cb12dfb59 100644 --- a/deeppavlov/models/morpho_tagger/lemmatizer.py +++ b/deeppavlov/models/morpho_tagger/lemmatizer.py @@ -19,8 +19,8 @@ from pymorphy2 import MorphAnalyzer from russian_tagsets import converters -from deeppavlov.core.models.serializable import Serializable from deeppavlov.core.common.registry import register +from deeppavlov.core.models.serializable import Serializable from deeppavlov.models.morpho_tagger.common_tagger import get_tag_distance @@ -77,6 +77,7 @@ class UDPymorphyLemmatizer(BasicLemmatizer): Lemma is selected from one of PyMorphy parses, the parse whose tag resembles the most a known UD tag is chosen. """ + def __init__(self, save_path: Optional[str] = None, load_path: Optional[str] = None, transform_lemmas=False, **kwargs) -> None: self.transform_lemmas = transform_lemmas diff --git a/deeppavlov/models/morpho_tagger/morpho_tagger.py b/deeppavlov/models/morpho_tagger/morpho_tagger.py index b4a1dd0f7a..ad112cb206 100644 --- a/deeppavlov/models/morpho_tagger/morpho_tagger.py +++ b/deeppavlov/models/morpho_tagger/morpho_tagger.py @@ -16,11 +16,11 @@ from pathlib import Path from typing import List, Optional, Union, Tuple -import numpy as np +import keras.backend as kb import keras.layers as kl import keras.optimizers as ko import keras.regularizers as kreg -import keras.backend as kb +import numpy as np from keras import Model from deeppavlov.core.common.registry import register @@ -75,6 +75,7 @@ class MorphoTagger(KerasModel): A subclass of :class:`~deeppavlov.core.models.keras_model.KerasModel` """ + def __init__(self, symbols: SimpleVocabulary, tags: SimpleVocabulary, @@ -166,7 +167,7 @@ def _initialize(self): def build(self): """Builds the network using Keras. """ - word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32") + word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH + 2), dtype="int32") inputs = [word_inputs] word_outputs = self._build_word_cnn(word_inputs) if len(self.word_vectorizers) > 0: @@ -231,17 +232,17 @@ def _build_basic_network(self, word_outputs): lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs) else: lstm_outputs = word_outputs - for j in range(self.word_lstm_layers-1): + for j in range(self.word_lstm_layers - 1): lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[j], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) lstm_outputs = kl.Bidirectional( - kl.LSTM(self.word_lstm_units[-1], return_sequences=True, - dropout=self.lstm_dropout))(lstm_outputs) + kl.LSTM(self.word_lstm_units[-1], return_sequences=True, + dropout=self.lstm_dropout))(lstm_outputs) pre_outputs = kl.TimeDistributed( - kl.Dense(len(self.tags), activation="softmax", - activity_regularizer=self.regularizer), - name="p")(lstm_outputs) + kl.Dense(len(self.tags), activation="softmax", + activity_regularizer=self.regularizer), + name="p")(lstm_outputs) return pre_outputs, lstm_outputs def _transform_batch(self, data, labels=None, transform_to_one_hot=True): @@ -309,7 +310,7 @@ def __call__(self, *x_batch, **kwargs) -> Union[List, np.ndarray]: """ return self.predict_on_batch(x_batch, **kwargs) - def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray: + def _make_sent_vector(self, sent: List, bucket_length: int = None) -> np.ndarray: """Transforms a sentence to Numpy array, which will be the network input. Args: @@ -321,14 +322,14 @@ def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray: in j-th word of i-th input sentence. """ bucket_length = bucket_length or len(sent) - answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32) + answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH + 2), dtype=np.int32) for i, word in enumerate(sent): answer[i, 0] = self.tags["BEGIN"] m = min(len(word), MAX_WORD_LENGTH) for j, x in enumerate(word[-m:]): - answer[i, j+1] = self.symbols[x] - answer[i, m+1] = self.tags["END"] - answer[i, m+2:] = self.tags["PAD"] + answer[i, j + 1] = self.symbols[x] + answer[i, m + 1] = self.tags["END"] + answer[i, m + 2:] = self.tags["PAD"] return answer def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray: diff --git a/deeppavlov/models/ner/bio.py b/deeppavlov/models/ner/bio.py index 19eca12da1..7eb75015ed 100644 --- a/deeppavlov/models/ner/bio.py +++ b/deeppavlov/models/ner/bio.py @@ -24,6 +24,7 @@ @register('ner_bio_converter') class BIOMarkupRestorer(Component): """Restores BIO markup for tags batch""" + def __init__(self, *args, **kwargs) -> None: pass diff --git a/deeppavlov/models/ner/svm.py b/deeppavlov/models/ner/svm.py index 04b656e4b3..d8eda1538b 100644 --- a/deeppavlov/models/ner/svm.py +++ b/deeppavlov/models/ner/svm.py @@ -36,6 +36,7 @@ class SVMTagger(Estimator): kernel: kernel of SVM (RBF works well in the most of the cases) seed: seed for SVM initialization """ + def __init__(self, return_probabilities: bool = False, kernel: str = 'rbf', seed=42, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.classifier = None diff --git a/deeppavlov/models/preprocessors/bert_preprocessor.py b/deeppavlov/models/preprocessors/bert_preprocessor.py index 86dfdd6df3..5275800e5c 100644 --- a/deeppavlov/models/preprocessors/bert_preprocessor.py +++ b/deeppavlov/models/preprocessors/bert_preprocessor.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import re from logging import getLogger from typing import Tuple, List, Optional, Union diff --git a/deeppavlov/models/preprocessors/capitalization.py b/deeppavlov/models/preprocessors/capitalization.py index 59fe43b74c..3760979471 100644 --- a/deeppavlov/models/preprocessors/capitalization.py +++ b/deeppavlov/models/preprocessors/capitalization.py @@ -38,6 +38,7 @@ class CapitalizationPreprocessor(Component): Attributes: dim: dimensionality of the feature vectors, produced by the featurizer """ + def __init__(self, pad_zeros: bool = True, *args, **kwargs) -> None: self.pad_zeros = pad_zeros self._num_of_features = 4 diff --git a/deeppavlov/models/preprocessors/char_splitter.py b/deeppavlov/models/preprocessors/char_splitter.py index 765a79867a..c242d3612e 100644 --- a/deeppavlov/models/preprocessors/char_splitter.py +++ b/deeppavlov/models/preprocessors/char_splitter.py @@ -25,6 +25,7 @@ @register('char_splitter') class CharSplitter(Component): """This component transforms batch of sequences of tokens into batch of sequences of character sequences.""" + def __init__(self, **kwargs): pass diff --git a/deeppavlov/models/preprocessors/dirty_comments_preprocessor.py b/deeppavlov/models/preprocessors/dirty_comments_preprocessor.py index 90ce6c23fa..656e819478 100644 --- a/deeppavlov/models/preprocessors/dirty_comments_preprocessor.py +++ b/deeppavlov/models/preprocessors/dirty_comments_preprocessor.py @@ -25,6 +25,7 @@ class DirtyCommentsPreprocessor(Component): """ Class implements preprocessing of english texts with low level of literacy such as comments """ + def __init__(self, remove_punctuation: bool = True, *args, **kwargs): self.remove_punctuation = remove_punctuation diff --git a/deeppavlov/models/preprocessors/mask.py b/deeppavlov/models/preprocessors/mask.py index 48da525b01..ca4e3149a2 100644 --- a/deeppavlov/models/preprocessors/mask.py +++ b/deeppavlov/models/preprocessors/mask.py @@ -21,6 +21,7 @@ @register('mask') class Mask(Component): """Takes batch of tokens and returns the masks of corresponding length""" + def __init__(self, *args, **kwargs): pass diff --git a/deeppavlov/models/preprocessors/one_hotter.py b/deeppavlov/models/preprocessors/one_hotter.py index 3accf6451d..7dca070f92 100644 --- a/deeppavlov/models/preprocessors/one_hotter.py +++ b/deeppavlov/models/preprocessors/one_hotter.py @@ -33,6 +33,7 @@ class OneHotter(Component): pad_zeros: whether to pad elements of batch with zeros single_vector: whether to return one vector for the sample (sum of each one-hotted vectors) """ + def __init__(self, depth: int, pad_zeros: bool = False, single_vector=False, *args, **kwargs): self._depth = depth diff --git a/deeppavlov/models/preprocessors/random_embeddings_matrix.py b/deeppavlov/models/preprocessors/random_embeddings_matrix.py index 1fae19b372..b72f75a0fa 100644 --- a/deeppavlov/models/preprocessors/random_embeddings_matrix.py +++ b/deeppavlov/models/preprocessors/random_embeddings_matrix.py @@ -28,6 +28,7 @@ class RandomEmbeddingsMatrix: Attributes: dim: dimensionality of the embeddings """ + def __init__(self, vocab_len: int, emb_dim: int, *args, **kwargs) -> None: self.emb_mat = np.random.randn(vocab_len, emb_dim).astype(np.float32) / np.sqrt(emb_dim) diff --git a/deeppavlov/models/preprocessors/russian_lemmatizer.py b/deeppavlov/models/preprocessors/russian_lemmatizer.py index d05aa1139f..ae68f4fc97 100644 --- a/deeppavlov/models/preprocessors/russian_lemmatizer.py +++ b/deeppavlov/models/preprocessors/russian_lemmatizer.py @@ -21,6 +21,7 @@ @register('pymorphy_russian_lemmatizer') class PymorphyRussianLemmatizer(Component): """Class for lemmatization using PyMorphy.""" + def __init__(self, *args, **kwargs): self.lemmatizer = pymorphy2.MorphAnalyzer() diff --git a/deeppavlov/models/preprocessors/sanitizer.py b/deeppavlov/models/preprocessors/sanitizer.py index 314c0709c6..7d60cd29a9 100644 --- a/deeppavlov/models/preprocessors/sanitizer.py +++ b/deeppavlov/models/preprocessors/sanitizer.py @@ -29,6 +29,7 @@ class Sanitizer(Component): diacritical signs are something like hats and stress marks nums: whether to replace all digits with 1 or not """ + def __init__(self, diacritical: bool = True, nums: bool = False, diff --git a/deeppavlov/models/preprocessors/siamese_preprocessor.py b/deeppavlov/models/preprocessors/siamese_preprocessor.py index c95f01a122..9a7a92332e 100644 --- a/deeppavlov/models/preprocessors/siamese_preprocessor.py +++ b/deeppavlov/models/preprocessors/siamese_preprocessor.py @@ -111,7 +111,7 @@ def __call__(self, x: Union[List[List[str]], List[str]]) -> Iterable[List[List[n else: x_preproc = [[el] for el in x] else: - x_preproc = [el[:self.num_context_turns+self.num_ranking_samples] for el in x] + x_preproc = [el[:self.num_context_turns + self.num_ranking_samples] for el in x] for el in x_preproc: x_tok = self.tokenizer(el) x_ctok = [y if len(y) != 0 else [''] for y in x_tok] @@ -126,7 +126,7 @@ def __call__(self, x: Union[List[List[str]], List[str]]) -> Iterable[List[List[n x_proc = zero_pad_truncate(x_proc, msl, pad=self.padding, trunc=self.truncating) x_proc = list(x_proc) if self.add_raw_text: - x_proc += el # add (self.num_context_turns+self.num_ranking_samples) raw sentences + x_proc += el # add (self.num_context_turns+self.num_ranking_samples) raw sentences yield x_proc def load(self) -> None: diff --git a/deeppavlov/models/preprocessors/squad_preprocessor.py b/deeppavlov/models/preprocessors/squad_preprocessor.py index 62a09cbf6d..f589faf1ce 100644 --- a/deeppavlov/models/preprocessors/squad_preprocessor.py +++ b/deeppavlov/models/preprocessors/squad_preprocessor.py @@ -13,13 +13,13 @@ # limitations under the License. +import bisect import pickle import unicodedata from collections import Counter from logging import getLogger from pathlib import Path from typing import Tuple, List, Union -import bisect import numpy as np from nltk import word_tokenize @@ -53,11 +53,11 @@ def __init__(self, context_limit: int = 450, question_limit: int = 150, char_lim def __call__(self, contexts_raw: Tuple[str, ...], questions_raw: Tuple[str, ...], **kwargs) -> Tuple[ - List[str], List[List[str]], List[List[List[str]]], - List[List[int]], List[List[int]], - List[str], List[List[str]], List[List[List[str]]], - List[List[Tuple[int, int]]] - ]: + List[str], List[List[str]], List[List[List[str]]], + List[List[int]], List[List[int]], + List[str], List[List[str]], List[List[List[str]]], + List[List[Tuple[int, int]]] + ]: """ Performs preprocessing of context and question Args: contexts_raw: batch of contexts to preprocess @@ -100,7 +100,7 @@ def __call__(self, contexts_raw: Tuple[str, ...], questions_raw: Tuple[str, ...] questions_chars.append(q_chars) spans.append(SquadPreprocessor.convert_idx(c, c_tokens)) return contexts, contexts_tokens, contexts_chars, contexts_r2p, contexts_p2r, \ - questions, questions_tokens, questions_chars, spans + questions, questions_tokens, questions_chars, spans @staticmethod def preprocess_str(line: str, return_mapping: bool = False) -> Union[Tuple[str, List[int], List[int]], str]: @@ -480,7 +480,7 @@ def __call__(self, answers_start, answers_end, contexts, bert_features, subtok2c end = self.get_char_position(sub2c, answer_end) subtok = features.tokens[answer_end] subtok = subtok[2:] if subtok.startswith('##') else subtok - answer = context[st:end+len(subtok)] + answer = context[st:end + len(subtok)] answers += [answer] starts += [st] ends += [ends] diff --git a/deeppavlov/models/preprocessors/str_token_reverser.py b/deeppavlov/models/preprocessors/str_token_reverser.py index 7dffe0af14..3c25ff8d68 100644 --- a/deeppavlov/models/preprocessors/str_token_reverser.py +++ b/deeppavlov/models/preprocessors/str_token_reverser.py @@ -1,4 +1,3 @@ - # Copyright 2017 Neural Networks and Deep Learning lab, MIPT # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,6 +27,7 @@ class StrTokenReverser(Component): Args: tokenized: The parameter is only needed to reverse tokenized strings. """ + def __init__(self, tokenized: bool = False, *args, **kwargs) -> None: self.tokenized = tokenized @@ -54,7 +54,7 @@ def __call__(self, batch: Union[str, list, tuple]) -> StrTokenReverserInfo: """ if isinstance(batch, (list, tuple)): batch = batch.copy() - + if self.tokenized: if isinstance(batch, (list, tuple)): if isinstance(batch[-1], str): @@ -69,4 +69,3 @@ def __call__(self, batch: Union[str, list, tuple]) -> StrTokenReverserInfo: return [self(line) for line in batch] else: return self._reverse_str(batch) - diff --git a/deeppavlov/models/preprocessors/str_utf8_encoder.py b/deeppavlov/models/preprocessors/str_utf8_encoder.py index 6f8437f1e4..2aaa0703cb 100644 --- a/deeppavlov/models/preprocessors/str_utf8_encoder.py +++ b/deeppavlov/models/preprocessors/str_utf8_encoder.py @@ -44,7 +44,8 @@ class StrUTF8Encoder(Estimator): bos: Name of a special token of the begin of a sentence. eos: Name of a special token of the end of a sentence. """ - def __init__(self, + + def __init__(self, max_word_length: int = 50, pad_special_char_use: bool = False, word_boundary_special_char_use: bool = False, @@ -56,9 +57,9 @@ def __init__(self, super().__init__(**kwargs) if word_boundary_special_char_use and max_word_length < 3: - raise ConfigError(f"`max_word_length` should be more than 3!") + raise ConfigError(f"`max_word_length` should be more than 3!") if max_word_length < 1: - raise ConfigError(f"`max_word_length` should be more than 1!") + raise ConfigError(f"`max_word_length` should be more than 1!") self._max_word_length = max_word_length self._reverse = reversed_sentense_tokens @@ -85,12 +86,12 @@ def _make_bos_eos(indx): else: code = indx if self._pad_special_char_use: - code = np.pad(code, (0, self._max_word_length - code.shape[0]), 'constant', + code = np.pad(code, (0, self._max_word_length - code.shape[0]), 'constant', constant_values=(self.pad_char)) else: pass return code - + self.bos_chars = _make_bos_eos(self.bos_char) self.eos_chars = _make_bos_eos(self.eos_char) @@ -149,14 +150,14 @@ def save(self) -> None: with self.save_path.open('wt', encoding='utf8') as f: for token in self._word_char_ids.keys(): f.write('{}\n'.format(token)) - + @overrides def fit(self, *args) -> None: words = chain(*args) # filter(None, <>) -- to filter empty words freqs = Counter(filter(None, chain(*words))) for token, _ in freqs.most_common(): - if not(token in self._word_char_ids): + if not (token in self._word_char_ids): self._word_char_ids[token] = self._convert_word_to_char_ids(token) def _convert_word_to_char_ids(self, word): diff --git a/deeppavlov/models/ranking/bilstm_gru_siamese_network.py b/deeppavlov/models/ranking/bilstm_gru_siamese_network.py index e56c2a1187..281633db0a 100644 --- a/deeppavlov/models/ranking/bilstm_gru_siamese_network.py +++ b/deeppavlov/models/ranking/bilstm_gru_siamese_network.py @@ -23,9 +23,9 @@ log = getLogger(__name__) + @register('bilstm_gru_nn') class BiLSTMGRUSiameseNetwork(BiLSTMSiameseNetwork): - """The class implementing a siamese neural network with BiLSTM, GRU and max pooling. GRU is used to take into account multi-turn dialogue ``context``. @@ -48,6 +48,7 @@ class BiLSTMGRUSiameseNetwork(BiLSTMSiameseNetwork): If set to ``False`` random sampling will be used. Only required if ``triplet_loss`` is set to ``True``. """ + def create_model(self) -> Model: input = [] if self.use_matrix: @@ -100,11 +101,10 @@ def create_score_model(self) -> Model: def create_context_model(self) -> Model: m = Model(self.model.inputs[:-1], - self.model.get_layer("gru").output) + self.model.get_layer("gru").output) return m def create_response_model(self) -> Model: m = Model(self.model.inputs[-1], - self.model.get_layer("pooling").get_output_at(-1)) + self.model.get_layer("pooling").get_output_at(-1)) return m - diff --git a/deeppavlov/models/ranking/bilstm_siamese_network.py b/deeppavlov/models/ranking/bilstm_siamese_network.py index 3788b5974d..d192b9e9b1 100644 --- a/deeppavlov/models/ranking/bilstm_siamese_network.py +++ b/deeppavlov/models/ranking/bilstm_siamese_network.py @@ -34,7 +34,6 @@ @register('bilstm_nn') class BiLSTMSiameseNetwork(KerasSiameseModel): - """The class implementing a siamese neural network with BiLSTM and max pooling. There is a possibility to use a binary cross-entropy loss as well as @@ -120,10 +119,10 @@ def lstm_layer(self) -> Layer: rec_in = Orthogonal(seed=self.seed) if self.recurrent == "bilstm" or self.recurrent is None: out = Bidirectional(LSTM(self.hidden_dim, - input_shape=(self.max_sequence_length, self.embedding_dim,), - kernel_initializer=ker_in, - recurrent_initializer=rec_in, - return_sequences=ret_seq), merge_mode='concat') + input_shape=(self.max_sequence_length, self.embedding_dim,), + kernel_initializer=ker_in, + recurrent_initializer=rec_in, + return_sequences=ret_seq), merge_mode='concat') elif self.recurrent == "lstm": out = LSTM(self.hidden_dim, input_shape=(self.max_sequence_length, self.embedding_dim,), @@ -187,7 +186,7 @@ def create_score_model(self) -> Model: def _diff_mult_dist(self, inputs: List[Tensor]) -> Tensor: input1, input2 = inputs - a = K.abs(input1-input2) + a = K.abs(input1 - input2) b = Multiply()(inputs) return K.concatenate([input1, input2, a, b]) @@ -216,7 +215,7 @@ def _pairwise_distances(self, inputs: List[Tensor]) -> Tensor: distances = distances * (1.0 - mask) return distances - def _triplet_loss(self, labels: Tensor, pairwise_dist: Tensor) -> Tensor : + def _triplet_loss(self, labels: Tensor, pairwise_dist: Tensor) -> Tensor: y_true = K.squeeze(labels, axis=1) """Triplet loss function""" if self.hard_triplets: @@ -244,8 +243,8 @@ def _batch_hard_triplet_loss(self, y_true: Tensor, pairwise_dist: Tensor) -> Ten mask_anchor_negative = self._get_anchor_negative_triplet_mask(y_true, pairwise_dist) anchor_negative_dist = mask_anchor_negative * pairwise_dist mask_anchor_negative = self._get_semihard_anchor_negative_triplet_mask(anchor_negative_dist, - hardest_positive_dist, - mask_anchor_negative) + hardest_positive_dist, + mask_anchor_negative) max_anchor_negative_dist = K.max(pairwise_dist, axis=1, keepdims=True) anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (1.0 - mask_anchor_negative) hardest_negative_dist = K.min(anchor_negative_dist, axis=1, keepdims=True) @@ -290,5 +289,5 @@ def _get_semihard_anchor_negative_triplet_mask(self, negative_dist: Tensor, mask = mask_negative * (1 - mask_semihard) + mask * mask_semihard return mask - def _predict_on_batch(self, batch: List[np.ndarray]) -> np.ndarray: + def _predict_on_batch(self, batch: List[np.ndarray]) -> np.ndarray: return self.score_model.predict_on_batch(x=batch) diff --git a/deeppavlov/models/ranking/deep_attention_matching_network.py b/deeppavlov/models/ranking/deep_attention_matching_network.py index 41b261018b..71b74a64a6 100644 --- a/deeppavlov/models/ranking/deep_attention_matching_network.py +++ b/deeppavlov/models/ranking/deep_attention_matching_network.py @@ -19,9 +19,9 @@ import tensorflow as tf from deeppavlov.core.common.registry import register -from deeppavlov.models.ranking.tf_base_matching_model import TensorflowBaseMatchingModel from deeppavlov.models.ranking.matching_models.dam_utils import layers from deeppavlov.models.ranking.matching_models.dam_utils import operations as op +from deeppavlov.models.ranking.tf_base_matching_model import TensorflowBaseMatchingModel log = getLogger(__name__) diff --git a/deeppavlov/models/ranking/deep_attention_matching_network_use_transformer.py b/deeppavlov/models/ranking/deep_attention_matching_network_use_transformer.py index cd322dd24a..a9dc45ccd0 100644 --- a/deeppavlov/models/ranking/deep_attention_matching_network_use_transformer.py +++ b/deeppavlov/models/ranking/deep_attention_matching_network_use_transformer.py @@ -20,9 +20,9 @@ import tensorflow_hub as hub from deeppavlov.core.common.registry import register -from deeppavlov.models.ranking.tf_base_matching_model import TensorflowBaseMatchingModel from deeppavlov.models.ranking.matching_models.dam_utils import layers from deeppavlov.models.ranking.matching_models.dam_utils import operations as op +from deeppavlov.models.ranking.tf_base_matching_model import TensorflowBaseMatchingModel log = getLogger(__name__) @@ -145,7 +145,6 @@ def _init_sentence_encoder(self): # for resp sentences: shape=(None, 1, 512) self.sent_embedder_response = tf.expand_dims(embed_response, axis=1) - def _init_graph(self): self._init_placeholders() self._init_sentence_encoder() @@ -189,7 +188,7 @@ def _init_graph(self): # context part # a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len] - list_turn_t = tf.unstack(self.utterance_ph, axis=1) + list_turn_t = tf.unstack(self.utterance_ph, axis=1) list_turn_length = tf.unstack(self.all_utterance_len_ph, axis=1) list_turn_t_sent = tf.unstack(sent_embedder_context, axis=1) @@ -264,7 +263,7 @@ def _init_graph(self): sim = tf.stack(sim_turns, axis=1) log.info('sim shape: %s' % sim.shape) with tf.variable_scope('cnn_aggregation'): - final_info = layers.CNN_3d(sim, 32, 32) # We can improve performance if use 32 filters for each layer + final_info = layers.CNN_3d(sim, 32, 32) # We can improve performance if use 32 filters for each layer # for douban # final_info = layers.CNN_3d(sim, 16, 16) @@ -312,12 +311,12 @@ def _append_sample_to_batch_buffer(self, sample: List[np.ndarray], buf: List[Tup """ sample_len = len(sample) - batch_buffer_context = [] # [batch_size, 10, 50] - batch_buffer_context_len = [] # [batch_size, 10] - batch_buffer_response = [] # [batch_size, 50] + batch_buffer_context = [] # [batch_size, 10, 50] + batch_buffer_context_len = [] # [batch_size, 10] + batch_buffer_response = [] # [batch_size, 50] batch_buffer_response_len = [] # [batch_size] - raw_batch_buffer_context = [] # [batch_size, 10] + raw_batch_buffer_context = [] # [batch_size, 10] raw_batch_buffer_response = [] # [batch_size] context_sentences = sample[:self.num_context_turns] @@ -330,12 +329,12 @@ def _append_sample_to_batch_buffer(self, sample: List[np.ndarray], buf: List[Tup # 4 model inputs # 1. Token indices for context - batch_buffer_context += [context_sentences for sent in response_sentences] # replicate context N times + batch_buffer_context += [context_sentences for sent in response_sentences] # replicate context N times # 2. Token indices for response batch_buffer_response += [response_sentence for response_sentence in response_sentences] # 3. Lengths of all context sentences lens = [] - for context in [context_sentences for sent in response_sentences]: # replicate context N times + for context in [context_sentences for sent in response_sentences]: # replicate context N times context_sentences_lens = [] for sent in context: sent_len = len(sent[sent != 0]) @@ -391,7 +390,7 @@ def _make_batch(self, batch: List[Tuple[np.ndarray]]) -> Dict: input_context_len.append(sample[1]) input_response.append(sample[2]) input_response_len.append(sample[3]) - input_raw_context.append(sample[4]) # raw context is the 4th element of each Tuple in the batch + input_raw_context.append(sample[4]) # raw context is the 4th element of each Tuple in the batch input_raw_response.append(sample[5]) # raw response is the 5th element of each Tuple in the batch return { diff --git a/deeppavlov/models/ranking/keras_siamese_model.py b/deeppavlov/models/ranking/keras_siamese_model.py index 7826d6e671..545f3469ab 100644 --- a/deeppavlov/models/ranking/keras_siamese_model.py +++ b/deeppavlov/models/ranking/keras_siamese_model.py @@ -68,7 +68,7 @@ def __init__(self, self.model = self.create_model() self.compile() if self.load_path.exists(): - self.load() + self.load() else: self.load_initial_emb_matrix() @@ -100,12 +100,12 @@ def create_model(self) -> Model: def create_context_model(self) -> Model: m = Model(self.model.inputs[:-1], - self.model.get_layer("sentence_embedding").get_output_at(0)) + self.model.get_layer("sentence_embedding").get_output_at(0)) return m def create_response_model(self) -> Model: m = Model(self.model.inputs[-1], - self.model.get_layer("sentence_embedding").get_output_at(1)) + self.model.get_layer("sentence_embedding").get_output_at(1)) return m def _train_on_batch(self, batch: List[np.ndarray], y: List[int]) -> float: @@ -121,4 +121,3 @@ def _predict_context_on_batch(self, batch: List[np.ndarray]) -> np.ndarray: def _predict_response_on_batch(self, batch: List[np.ndarray]) -> np.ndarray: return self.response_model.predict_on_batch(batch) - diff --git a/deeppavlov/models/ranking/matching_models/dam_utils/layers.py b/deeppavlov/models/ranking/matching_models/dam_utils/layers.py index b4be0e6db9..037453d77e 100644 --- a/deeppavlov/models/ranking/matching_models/dam_utils/layers.py +++ b/deeppavlov/models/ranking/matching_models/dam_utils/layers.py @@ -90,15 +90,16 @@ def dynamic_L(x): shape=[x.shape[-1]], dtype=tf.float32, initializer=tf.random_uniform_initializer( - -tf.sqrt(6./tf.cast(x.shape[-1], tf.float32)), - tf.sqrt(6./tf.cast(x.shape[-1], tf.float32)))) + -tf.sqrt(6. / tf.cast(x.shape[-1], tf.float32)), + tf.sqrt(6. / tf.cast(x.shape[-1], tf.float32)))) - key = op.dense(x, add_bias=False) #[batch, time, dimension] - weight = tf.reduce_sum(tf.multiply(key, key_0), axis=-1) #[batch, time] - weight = tf.expand_dims(tf.nn.softmax(weight), -1) #[batch, time, 1] + key = op.dense(x, add_bias=False) # [batch, time, dimension] + weight = tf.reduce_sum(tf.multiply(key, key_0), axis=-1) # [batch, time] + weight = tf.expand_dims(tf.nn.softmax(weight), -1) # [batch, time, 1] + + L = tf.reduce_sum(tf.multiply(x, weight), axis=1) # [batch, dimension] + return L - L = tf.reduce_sum(tf.multiply(x, weight), axis=1) #[batch, dimension] - return L def loss(x, y, num_classes=2, is_clip=True, clip_value=10): '''From info x calculate logits as return loss. @@ -152,12 +153,13 @@ def loss(x, y, num_classes=2, is_clip=True, clip_value=10): return loss, logits + def attention( - Q, K, V, - Q_lengths, K_lengths, - attention_type='dot', - is_mask=True, mask_value=-2**32+1, - drop_prob=None): + Q, K, V, + Q_lengths, K_lengths, + attention_type='dot', + is_mask=True, mask_value=-2 ** 32 + 1, + drop_prob=None): '''Add attention layer. Args: Q: a tensor with shape [batch, Q_time, Q_dimension] @@ -182,16 +184,16 @@ def attention( K_time = K.shape[1] if attention_type == 'dot': - logits = op.dot_sim(Q, K) #[batch, Q_time, time] + logits = op.dot_sim(Q, K) # [batch, Q_time, time] if attention_type == 'bilinear': logits = op.bilinear_sim(Q, K) if is_mask: - mask = op.mask(Q_lengths, K_lengths, Q_time, K_time) #[batch, Q_time, K_time] + mask = op.mask(Q_lengths, K_lengths, Q_time, K_time) # [batch, Q_time, K_time] # mask = tf.Print(mask, [logits[0], mask[0]], tf.get_variable_scope().name + " logits, mask: ", summarize=10) logits = mask * logits + (1 - mask) * mask_value # logits = tf.Print(logits, [logits[0]], tf.get_variable_scope().name + " masked logits: ", summarize=10) - + attention = tf.nn.softmax(logits) if drop_prob is not None: @@ -200,6 +202,7 @@ def attention( return op.weighted_sum(attention, V) + def FFN(x, out_dimension_0=None, out_dimension_1=None): '''Add two dense connected layer, max(0, x*W0+b0)*W1+b1. @@ -217,16 +220,17 @@ def FFN(x, out_dimension_0=None, out_dimension_1=None): y = tf.nn.relu(y) with tf.variable_scope('FFN_2'): # z = op.dense(y, out_dimension_1, initializer=tf.keras.initializers.glorot_uniform(seed=42)) # TODO: check - z = op.dense(y, out_dimension_1) #, add_bias=False) #!!!! + z = op.dense(y, out_dimension_1) # , add_bias=False) #!!!! return z + def block( - Q, K, V, - Q_lengths, K_lengths, - attention_type='dot', - is_layer_norm=True, - is_mask=True, mask_value=-2**32+1, - drop_prob=None): + Q, K, V, + Q_lengths, K_lengths, + attention_type='dot', + is_layer_norm=True, + is_mask=True, mask_value=-2 ** 32 + 1, + drop_prob=None): '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf. Args: Q: a tensor with shape [batch, Q_time, Q_dimension] @@ -241,8 +245,8 @@ def block( Raises: ''' - att = attention(Q, K, V, - Q_lengths, K_lengths, + att = attention(Q, K, V, + Q_lengths, K_lengths, attention_type=attention_type, is_mask=is_mask, mask_value=mask_value, drop_prob=drop_prob) @@ -260,6 +264,7 @@ def block( w = y + z return w + def CNN(x, out_channels, filter_size, pooling_size, add_relu=True): '''Add a convlution layer with relu and max pooling layer. @@ -274,9 +279,9 @@ def CNN(x, out_channels, filter_size, pooling_size, add_relu=True): Raises: ''' - #calculate the last dimension of return - num_features = ((tf.shape(x)[1]-filter_size+1)/pooling_size * - (tf.shape(x)[2]-filter_size+1)/pooling_size) * out_channels + # calculate the last dimension of return + num_features = ((tf.shape(x)[1] - filter_size + 1) / pooling_size * + (tf.shape(x)[2] - filter_size + 1) / pooling_size) * out_channels in_channels = x.shape[-1] weights = tf.get_variable( @@ -297,13 +302,14 @@ def CNN(x, out_channels, filter_size, pooling_size, add_relu=True): conv = tf.nn.relu(conv) pooling = tf.nn.max_pool( - conv, + conv, ksize=[1, pooling_size, pooling_size, 1], - strides=[1, pooling_size, pooling_size, 1], + strides=[1, pooling_size, pooling_size, 1], padding="VALID") return tf.contrib.layers.flatten(pooling) + def CNN_3d(x, out_channels_0, out_channels_1, add_relu=True): '''Add a 3d convlution layer with relu and max pooling layer. @@ -331,20 +337,20 @@ def CNN_3d(x, out_channels_0, out_channels_1, add_relu=True): initializer=tf.zeros_initializer()) conv_0 = tf.nn.conv3d(x, weights_0, strides=[1, 1, 1, 1, 1], padding="SAME") - log.info('conv_0 shape: %s' %conv_0.shape) + log.info('conv_0 shape: %s' % conv_0.shape) conv_0 = conv_0 + bias_0 if add_relu: conv_0 = tf.nn.elu(conv_0) pooling_0 = tf.nn.max_pool3d( - conv_0, + conv_0, ksize=[1, 3, 3, 3, 1], - strides=[1, 3, 3, 3, 1], + strides=[1, 3, 3, 3, 1], padding="SAME") - log.info('pooling_0 shape: %s' %pooling_0.shape) + log.info('pooling_0 shape: %s' % pooling_0.shape) - #layer_1 + # layer_1 weights_1 = tf.get_variable( name='filter_1', shape=[3, 3, 3, out_channels_0, out_channels_1], @@ -357,21 +363,22 @@ def CNN_3d(x, out_channels_0, out_channels_1, add_relu=True): initializer=tf.zeros_initializer()) conv_1 = tf.nn.conv3d(pooling_0, weights_1, strides=[1, 1, 1, 1, 1], padding="SAME") - log.info('conv_1 shape: %s' %conv_1.shape) + log.info('conv_1 shape: %s' % conv_1.shape) conv_1 = conv_1 + bias_1 if add_relu: conv_1 = tf.nn.elu(conv_1) pooling_1 = tf.nn.max_pool3d( - conv_1, + conv_1, ksize=[1, 3, 3, 3, 1], - strides=[1, 3, 3, 3, 1], + strides=[1, 3, 3, 3, 1], padding="SAME") - log.info('pooling_1 shape: %s' %pooling_1.shape) + log.info('pooling_1 shape: %s' % pooling_1.shape) return tf.contrib.layers.flatten(pooling_1) + def CNN_3d_2d(x, out_channels_0, out_channels_1, add_relu=True): '''Add a 3d convlution layer with relu and max pooling layer. @@ -399,20 +406,20 @@ def CNN_3d_2d(x, out_channels_0, out_channels_1, add_relu=True): initializer=tf.zeros_initializer()) conv_0 = tf.nn.conv3d(x, weights_0, strides=[1, 1, 1, 1, 1], padding="SAME") - log.info('conv_0 shape: %s' %conv_0.shape) + log.info('conv_0 shape: %s' % conv_0.shape) conv_0 = conv_0 + bias_0 if add_relu: conv_0 = tf.nn.elu(conv_0) pooling_0 = tf.nn.max_pool3d( - conv_0, + conv_0, ksize=[1, 1, 3, 3, 1], - strides=[1, 1, 3, 3, 1], + strides=[1, 1, 3, 3, 1], padding="SAME") - log.info('pooling_0 shape: %s' %pooling_0.shape) + log.info('pooling_0 shape: %s' % pooling_0.shape) - #layer_1 + # layer_1 weights_1 = tf.get_variable( name='filter_1', shape=[1, 3, 3, out_channels_0, out_channels_1], @@ -425,21 +432,22 @@ def CNN_3d_2d(x, out_channels_0, out_channels_1, add_relu=True): initializer=tf.zeros_initializer()) conv_1 = tf.nn.conv3d(pooling_0, weights_1, strides=[1, 1, 1, 1, 1], padding="SAME") - log.info('conv_1 shape: %s' %conv_1.shape) + log.info('conv_1 shape: %s' % conv_1.shape) conv_1 = conv_1 + bias_1 if add_relu: conv_1 = tf.nn.elu(conv_1) pooling_1 = tf.nn.max_pool3d( - conv_1, + conv_1, ksize=[1, 1, 3, 3, 1], - strides=[1, 1, 3, 3, 1], + strides=[1, 1, 3, 3, 1], padding="SAME") - log.info('pooling_1 shape: %s' %pooling_1.shape) + log.info('pooling_1 shape: %s' % pooling_1.shape) return tf.contrib.layers.flatten(pooling_1) + def CNN_3d_change(x, out_channels_0, out_channels_1, add_relu=True): '''Add a 3d convlution layer with relu and max pooling layer. @@ -459,22 +467,22 @@ def CNN_3d_change(x, out_channels_0, out_channels_1, add_relu=True): name='filter_0', shape=[3, 3, 3, in_channels, out_channels_0], dtype=tf.float32, - #initializer=tf.random_normal_initializer(0, 0.05)) + # initializer=tf.random_normal_initializer(0, 0.05)) initializer=tf.random_uniform_initializer(-0.01, 0.01)) bias_0 = tf.get_variable( name='bias_0', shape=[out_channels_0], dtype=tf.float32, initializer=tf.zeros_initializer()) - #Todo + # Todo g_0 = tf.get_variable(name='scale_0', - shape = [out_channels_0], - dtype=tf.float32, - initializer=tf.ones_initializer()) + shape=[out_channels_0], + dtype=tf.float32, + initializer=tf.ones_initializer()) weights_0 = tf.reshape(g_0, [1, 1, 1, out_channels_0]) * tf.nn.l2_normalize(weights_0, [0, 1, 2]) conv_0 = tf.nn.conv3d(x, weights_0, strides=[1, 1, 1, 1, 1], padding="VALID") - log.info('conv_0 shape: %s' %conv_0.shape) + log.info('conv_0 shape: %s' % conv_0.shape) conv_0 = conv_0 + bias_0 ####### ''' @@ -486,49 +494,50 @@ def CNN_3d_change(x, out_channels_0, out_channels_1, add_relu=True): conv_0 = tf.nn.elu(conv_0) pooling_0 = tf.nn.max_pool3d( - conv_0, + conv_0, ksize=[1, 2, 3, 3, 1], - strides=[1, 2, 3, 3, 1], + strides=[1, 2, 3, 3, 1], padding="VALID") - log.info('pooling_0 shape: %s' %pooling_0.shape) + log.info('pooling_0 shape: %s' % pooling_0.shape) - #layer_1 + # layer_1 weights_1 = tf.get_variable( name='filter_1', shape=[2, 2, 2, out_channels_0, out_channels_1], dtype=tf.float32, initializer=tf.random_uniform_initializer(-0.01, 0.01)) - + bias_1 = tf.get_variable( name='bias_1', shape=[out_channels_1], dtype=tf.float32, initializer=tf.zeros_initializer()) - + g_1 = tf.get_variable(name='scale_1', - shape = [out_channels_1], - dtype=tf.float32, - initializer=tf.ones_initializer()) + shape=[out_channels_1], + dtype=tf.float32, + initializer=tf.ones_initializer()) weights_1 = tf.reshape(g_1, [1, 1, 1, out_channels_1]) * tf.nn.l2_normalize(weights_1, [0, 1, 2]) conv_1 = tf.nn.conv3d(pooling_0, weights_1, strides=[1, 1, 1, 1, 1], padding="VALID") - log.info('conv_1 shape: %s' %conv_1.shape) + log.info('conv_1 shape: %s' % conv_1.shape) conv_1 = conv_1 + bias_1 - #with tf.variable_scope('layer_1'): + # with tf.variable_scope('layer_1'): # conv_1 = op.layer_norm(conv_1, axis=[1, 2, 3, 4]) if add_relu: conv_1 = tf.nn.elu(conv_1) pooling_1 = tf.nn.max_pool3d( - conv_1, + conv_1, ksize=[1, 3, 3, 3, 1], - strides=[1, 3, 3, 3, 1], + strides=[1, 3, 3, 3, 1], padding="VALID") - log.info('pooling_1 shape: %s' %pooling_1.shape) + log.info('pooling_1 shape: %s' % pooling_1.shape) return tf.contrib.layers.flatten(pooling_1) + def RNN_last_state(x, lengths, hidden_size): '''encode x with a gru cell and return the last state. @@ -544,5 +553,3 @@ def RNN_last_state(x, lengths, hidden_size): cell = tf.nn.rnn_cell.GRUCell(hidden_size) outputs, last_states = tf.nn.dynamic_rnn(cell, x, lengths, dtype=tf.float32) return outputs, last_states - - diff --git a/deeppavlov/models/ranking/matching_models/dam_utils/operations.py b/deeppavlov/models/ranking/matching_models/dam_utils/operations.py index 402f427083..a6bd6a5fee 100644 --- a/deeppavlov/models/ranking/matching_models/dam_utils/operations.py +++ b/deeppavlov/models/ranking/matching_models/dam_utils/operations.py @@ -25,27 +25,28 @@ # # Based on authors' Tensorflow code: https://github.com/baidu/Dialogue/tree/master/DAM -from logging import getLogger import math +from logging import getLogger import numpy as np import tensorflow as tf - from scipy.stats import multivariate_normal log = getLogger(__name__) def learning_rate(step_num, d_model=512, warmup_steps=4000): - a = step_num**(-0.5) - b = step_num*warmup_steps**(-1.5) - return a, b, d_model**(-0.5) * min(step_num**(-0.5), step_num*(warmup_steps**(-1.5))) + a = step_num ** (-0.5) + b = step_num * warmup_steps ** (-1.5) + return a, b, d_model ** (-0.5) * min(step_num ** (-0.5), step_num * (warmup_steps ** (-1.5))) + def selu(x): alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 log.info('use selu') - return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x)) + return scale * tf.where(x >= 0.0, x, alpha * tf.nn.elu(x)) + def bilinear_sim_4d(x, y, is_nor=True): '''calulate bilinear similarity with two 4d tensor. @@ -63,7 +64,7 @@ def bilinear_sim_4d(x, y, is_nor=True): bilinear matrix reuse error. ''' M = tf.get_variable( - name="bilinear_matrix", + name="bilinear_matrix", shape=[x.shape[2], y.shape[2], x.shape[3]], dtype=tf.float32, initializer=tf.orthogonal_initializer()) @@ -91,7 +92,7 @@ def bilinear_sim(x, y, is_nor=True): bilinear matrix reuse error. ''' M = tf.get_variable( - name="bilinear_matrix", + name="bilinear_matrix", shape=[x.shape[-1], y.shape[-1]], dtype=tf.float32, # initializer=tf.orthogonal_initializer()) @@ -105,6 +106,7 @@ def bilinear_sim(x, y, is_nor=True): else: return sim + def dot_sim(x, y, is_nor=True): '''calculate dot similarity with two tensor. @@ -129,6 +131,7 @@ def dot_sim(x, y, is_nor=True): else: return sim + def layer_norm(x, axis=None, epsilon=1e-6): '''Add layer normalization. @@ -158,10 +161,11 @@ def layer_norm(x, axis=None, epsilon=1e-6): mean = tf.reduce_mean(x, axis=axis, keepdims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=axis, keepdims=True) - norm = (x-mean) * tf.rsqrt(variance + epsilon) + norm = (x - mean) * tf.rsqrt(variance + epsilon) return scale * norm + bias -def layer_norm_debug(x, axis = None, epsilon=1e-6): + +def layer_norm_debug(x, axis=None, epsilon=1e-6): '''Add layer normalization. Args: @@ -190,9 +194,10 @@ def layer_norm_debug(x, axis = None, epsilon=1e-6): mean = tf.reduce_mean(x, axis=axis, keepdims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=axis, keepdims=True) - norm = (x-mean) * tf.rsqrt(variance + epsilon) + norm = (x - mean) * tf.rsqrt(variance + epsilon) return scale * norm + bias + def dense(x, out_dimension=None, add_bias=True, initializer=tf.orthogonal_initializer()): '''Add dense connected layer, Wx + b. @@ -223,6 +228,7 @@ def dense(x, out_dimension=None, add_bias=True, initializer=tf.orthogonal_initia else: return tf.einsum('bik,kj->bij', x, W) + def matmul_2d(x, out_dimension, drop_prob=None): '''Multiplies 2-d tensor by weights. @@ -246,11 +252,12 @@ def matmul_2d(x, out_dimension, drop_prob=None): return tf.matmul(x, W) + def gauss_positional_encoding_vector(x, role=0, value=0): position = int(x.shape[1]) dimension = int(x.shape[2]) - log.info('position: %s' %position) - log.info('dimension: %s' %dimension) + log.info('position: %s' % position) + log.info('dimension: %s' % dimension) _lambda = tf.get_variable( name='lambda', @@ -259,20 +266,19 @@ def gauss_positional_encoding_vector(x, role=0, value=0): initializer=tf.constant_initializer(value)) _lambda = tf.expand_dims(_lambda, axis=-1) - mean = [position/2.0, dimension/2.0] + mean = [position / 2.0, dimension / 2.0] - #cov = [[position/3.0, 0], [0, dimension/3.0]] - sigma_x = position/math.sqrt(4.0*dimension) - sigma_y = math.sqrt(dimension/4.0) - cov = [[sigma_x*sigma_x, role*sigma_x*sigma_y], - [role*sigma_x*sigma_y, sigma_y*sigma_y]] + # cov = [[position/3.0, 0], [0, dimension/3.0]] + sigma_x = position / math.sqrt(4.0 * dimension) + sigma_y = math.sqrt(dimension / 4.0) + cov = [[sigma_x * sigma_x, role * sigma_x * sigma_y], + [role * sigma_x * sigma_y, sigma_y * sigma_y]] pos = np.dstack(np.mgrid[0:position, 0:dimension]) - rv = multivariate_normal(mean, cov) - signal = rv.pdf(pos) - signal = signal - np.max(signal)/2.0 + signal = rv.pdf(pos) + signal = signal - np.max(signal) / 2.0 signal = tf.multiply(_lambda, signal) signal = tf.expand_dims(signal, axis=0) @@ -281,6 +287,7 @@ def gauss_positional_encoding_vector(x, role=0, value=0): return x + _lambda * signal + def positional_encoding(x, min_timescale=1.0, max_timescale=1.0e4, value=0): '''Adds a bunch of sinusoids of different frequencies to a tensor. @@ -305,14 +312,14 @@ def positional_encoding(x, min_timescale=1.0, max_timescale=1.0e4, value=0): position = tf.to_float(tf.range(length)) num_timescales = channels // 2 log_timescale_increment = ( - math.log(float(max_timescale) / float(min_timescale)) / - (tf.to_float(num_timescales) - 1)) + math.log(float(max_timescale) / float(min_timescale)) / + (tf.to_float(num_timescales) - 1)) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) - #signal = tf.reshape(signal, [1, length, channels]) + # signal = tf.reshape(signal, [1, length, channels]) signal = tf.expand_dims(signal, axis=0) return x + _lambda * signal @@ -343,8 +350,8 @@ def positional_encoding_vector(x, min_timescale=1.0, max_timescale=1.0e4, value= position = tf.to_float(tf.range(length)) num_timescales = channels // 2 log_timescale_increment = ( - math.log(float(max_timescale) / float(min_timescale)) / - (tf.to_float(num_timescales) - 1)) + math.log(float(max_timescale) / float(min_timescale)) / + (tf.to_float(num_timescales) - 1)) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0) @@ -356,6 +363,7 @@ def positional_encoding_vector(x, min_timescale=1.0, max_timescale=1.0e4, value= return x + signal + def mask(row_lengths, col_lengths, max_row_length, max_col_length): '''Return a mask tensor representing the first N positions of each row and each column. @@ -368,14 +376,15 @@ def mask(row_lengths, col_lengths, max_row_length, max_col_length): Raises: ''' - row_mask = tf.sequence_mask(row_lengths, max_row_length) #bool, [batch, max_row_len] - col_mask = tf.sequence_mask(col_lengths, max_col_length) #bool, [batch, max_col_len] + row_mask = tf.sequence_mask(row_lengths, max_row_length) # bool, [batch, max_row_len] + col_mask = tf.sequence_mask(col_lengths, max_col_length) # bool, [batch, max_col_len] row_mask = tf.cast(tf.expand_dims(row_mask, -1), tf.float32) col_mask = tf.cast(tf.expand_dims(col_mask, -1), tf.float32) return tf.einsum('bik,bjk->bij', row_mask, col_mask) + def weighted_sum(weight, values): '''Calcualte the weighted sum. @@ -389,7 +398,3 @@ def weighted_sum(weight, values): Raises: ''' return tf.einsum('bij,bjk->bik', weight, values) - - - - diff --git a/deeppavlov/models/ranking/metrics.py b/deeppavlov/models/ranking/metrics.py index bb899e9425..98629c61de 100644 --- a/deeppavlov/models/ranking/metrics.py +++ b/deeppavlov/models/ranking/metrics.py @@ -28,12 +28,14 @@ def rank_response(y_true, y_pred): if x == 0: rank_tot += i break - return float(rank_tot)/num_examples + return float(rank_tot) / num_examples + @register_metric('r@1_insQA') def r_at_1_insQA(y_true, y_pred): return recall_at_k_insQA(y_true, y_pred, k=1) + def recall_at_k_insQA(y_true, y_pred, k): labels = np.repeat(np.expand_dims(np.asarray(y_true), axis=1), k, axis=1) predictions = np.array(y_pred) @@ -44,4 +46,3 @@ def recall_at_k_insQA(y_true, y_pred, k): if predictions[i][j] in np.arange(labels[i][j]): flags[i][j] = 1. return np.mean((np.sum(flags, -1) >= 1.).astype(float)) - diff --git a/deeppavlov/models/ranking/mpm_siamese_network.py b/deeppavlov/models/ranking/mpm_siamese_network.py index 61e685d433..e629372e03 100644 --- a/deeppavlov/models/ranking/mpm_siamese_network.py +++ b/deeppavlov/models/ranking/mpm_siamese_network.py @@ -31,7 +31,6 @@ @register('mpm_nn') class MPMSiameseNetwork(BiLSTMSiameseNetwork): - """The class implementing a siamese neural network with bilateral multi-Perspective matching. The network architecture is based on https://arxiv.org/abs/1702.03814. @@ -89,7 +88,7 @@ def create_lstm_layer_2(self): ker_in = glorot_uniform(seed=self.seed) rec_in = Orthogonal(seed=self.seed) bioutp = Bidirectional(LSTM(self.aggregation_dim, - input_shape=(self.max_sequence_length, 8*self.perspective_num,), + input_shape=(self.max_sequence_length, 8 * self.perspective_num,), kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, @@ -123,11 +122,11 @@ def create_model(self) -> Model: f_layer_b = FullMatchingLayer(self.perspective_num) f_a_forw = f_layer_f([lstm_a[0], lstm_b[0]])[0] f_a_back = f_layer_b([Lambda(lambda x: K.reverse(x, 1))(lstm_a[1]), - Lambda(lambda x: K.reverse(x, 1))(lstm_b[1])])[0] + Lambda(lambda x: K.reverse(x, 1))(lstm_b[1])])[0] f_a_back = Lambda(lambda x: K.reverse(x, 1))(f_a_back) f_b_forw = f_layer_f([lstm_b[0], lstm_a[0]])[0] f_b_back = f_layer_b([Lambda(lambda x: K.reverse(x, 1))(lstm_b[1]), - Lambda(lambda x: K.reverse(x, 1))(lstm_a[1])])[0] + Lambda(lambda x: K.reverse(x, 1))(lstm_a[1])])[0] f_b_back = Lambda(lambda x: K.reverse(x, 1))(f_b_back) mp_layer_f = MaxpoolingMatchingLayer(self.perspective_num) @@ -179,4 +178,4 @@ def create_model(self) -> Model: dense = Dense(self.dense_dim, kernel_initializer=ker_in)(reduced) dist = Dense(1, activation='sigmoid', name="score_model")(dense) model = Model([context, response], dist) - return model \ No newline at end of file + return model diff --git a/deeppavlov/models/ranking/sequential_matching_network.py b/deeppavlov/models/ranking/sequential_matching_network.py index 4b006caf7b..a9222897af 100644 --- a/deeppavlov/models/ranking/sequential_matching_network.py +++ b/deeppavlov/models/ranking/sequential_matching_network.py @@ -54,7 +54,6 @@ def __init__(self, *args, **kwargs): - self.max_sentence_len = max_sequence_length self.word_embedding_size = embedding_dim self.trainable = trainable_embeddings diff --git a/deeppavlov/models/ranking/siamese_model.py b/deeppavlov/models/ranking/siamese_model.py index b32558c705..64eb8b2d7f 100644 --- a/deeppavlov/models/ranking/siamese_model.py +++ b/deeppavlov/models/ranking/siamese_model.py @@ -87,7 +87,7 @@ def __call__(self, samples_generator: Iterable[List[np.ndarray]]) -> Union[np.nd n_responses = self._append_sample_to_batch_buffer(sample, buf) if len(buf) >= self.batch_size: for i in range(len(buf) // self.batch_size): - b = self._make_batch(buf[i*self.batch_size:(i+1)*self.batch_size]) + b = self._make_batch(buf[i * self.batch_size:(i + 1) * self.batch_size]) yp = self._predict_on_batch(b) y_pred += list(yp) lenb = len(buf) % self.batch_size @@ -133,4 +133,3 @@ def _make_batch(self, x: List[List[np.ndarray]]) -> List[np.ndarray]: z = [el[i] for el in x] b.append(np.asarray(z)) return b - diff --git a/deeppavlov/models/ranking/siamese_predictor.py b/deeppavlov/models/ranking/siamese_predictor.py index f09c7157ed..a42dccc22b 100644 --- a/deeppavlov/models/ranking/siamese_predictor.py +++ b/deeppavlov/models/ranking/siamese_predictor.py @@ -24,6 +24,7 @@ log = getLogger(__name__) + @register('siamese_predictor') class SiamesePredictor(Component): """The class for ranking or paraphrase identification using the trained siamese network in the ``interact`` mode. @@ -77,7 +78,7 @@ def __init__(self, if not self.attention: self._build_response_embeddings() - def __call__(self, batch: Iterable[List[np.ndarray]]) -> List[Union[List[str],str]]: + def __call__(self, batch: Iterable[List[np.ndarray]]) -> List[Union[List[str], str]]: context = next(batch) try: next(batch) @@ -85,13 +86,12 @@ def __call__(self, batch: Iterable[List[np.ndarray]]) -> List[Union[List[str],st except StopIteration: pass - if self.ranking: if len(context) == self.num_context_turns: scores = [] if self.attention: for i in range(len(self.preproc_responses) // self.batch_size + 1): - responses = self.preproc_responses[i*self.batch_size: (i+1)*self.batch_size] + responses = self.preproc_responses[i * self.batch_size: (i + 1) * self.batch_size] b = [context + el for el in responses] b = self.model._make_batch(b) sc = self.model._predict_on_batch(b) @@ -126,7 +126,7 @@ def process_event(self) -> None: def _build_response_embeddings(self) -> None: resp_vecs = [] for i in range(len(self.preproc_responses) // self.batch_size + 1): - resp_preproc = self.preproc_responses[i*self.batch_size: (i+1)*self.batch_size] + resp_preproc = self.preproc_responses[i * self.batch_size: (i + 1) * self.batch_size] resp_preproc = self.model._make_batch(resp_preproc) resp_preproc = resp_preproc resp_vecs.append(self.model._predict_response_on_batch(resp_preproc)) @@ -135,7 +135,7 @@ def _build_response_embeddings(self) -> None: def _build_preproc_responses(self) -> None: responses = list(self.responses.values()) for i in range(len(responses) // self.batch_size + 1): - el = self.preproc_func(responses[i*self.batch_size: (i+1)*self.batch_size]) + el = self.preproc_func(responses[i * self.batch_size: (i + 1) * self.batch_size]) self.preproc_responses += list(el) def rebuild_responses(self, candidates) -> None: @@ -144,6 +144,3 @@ def rebuild_responses(self, candidates) -> None: self.preproc_responses = list() self.responses = {idx: sentence for idx, sentence in enumerate(candidates)} self._build_preproc_responses() - - - diff --git a/deeppavlov/models/ranking/tf_base_matching_model.py b/deeppavlov/models/ranking/tf_base_matching_model.py index debf88c653..8255777143 100644 --- a/deeppavlov/models/ranking/tf_base_matching_model.py +++ b/deeppavlov/models/ranking/tf_base_matching_model.py @@ -52,7 +52,8 @@ def __init__(self, *args, **kwargs) self.use_logits = use_logits if mean_oov: - self.emb_matrix[1] = np.mean(self.emb_matrix[2:], axis=0) # set mean embedding for OOV token at the 2nd index + self.emb_matrix[1] = np.mean(self.emb_matrix[2:], + axis=0) # set mean embedding for OOV token at the 2nd index def _append_sample_to_batch_buffer(self, sample: List[np.ndarray], buf: List[Tuple]) -> int: """ @@ -65,9 +66,9 @@ def _append_sample_to_batch_buffer(self, sample: List[np.ndarray], buf: List[Tup a number of candidate responses """ # - batch_buffer_context = [] # [batch_size, 10, 50] - batch_buffer_context_len = [] # [batch_size, 10] - batch_buffer_response = [] # [batch_size, 50] + batch_buffer_context = [] # [batch_size, 10, 50] + batch_buffer_context_len = [] # [batch_size, 10] + batch_buffer_response = [] # [batch_size, 50] batch_buffer_response_len = [] # [batch_size] context_sentences = sample[:self.num_context_turns] @@ -160,4 +161,4 @@ def _train_on_batch(self, batch: Dict, y: List[int]) -> float: float: value of mean loss on the batch """ batch.update({self.y_true: np.array(y)}) - return self.sess.run([self.loss, self.train_op], feed_dict=batch)[0] # return the first item aka loss \ No newline at end of file + return self.sess.run([self.loss, self.train_op], feed_dict=batch)[0] # return the first item aka loss diff --git a/deeppavlov/models/seq2seq_go_bot/bot.py b/deeppavlov/models/seq2seq_go_bot/bot.py index 60bf6108b7..9a309c0dfd 100644 --- a/deeppavlov/models/seq2seq_go_bot/bot.py +++ b/deeppavlov/models/seq2seq_go_bot/bot.py @@ -46,6 +46,7 @@ class Seq2SeqGoalOrientedBot(NNModel): **kwargs: parameters passed to parent :class:`~deeppavlov.core.models.nn_model.NNModel` class. """ + def __init__(self, network_parameters: Dict, embedder: Component, @@ -95,7 +96,7 @@ def _init_network(self, params): return Seq2SeqGoalOrientedBotNetwork(**params) def _embed_kb_key(self, key): -# TODO: fasttext embedder to work with tokens + # TODO: fasttext embedder to work with tokens emb = np.array(self.embedder([key.split('_')], mean=True)[0]) if self.debug: log.debug("embedding key tokens='{}', embedding shape = {}" @@ -124,10 +125,10 @@ def train_on_batch(self, utters, history_list, kb_entry_list, responses): # np.ones((batch_size, max_src_len), dtype=np.float32) b_enc_ins_np = np.zeros((batch_size, max_src_len, self.embedding_size), dtype=np.float32) - b_dec_ins_np = self.tgt_vocab[self.eos_token] *\ - np.ones((batch_size, max_tgt_len), dtype=np.float32) - b_dec_outs_np = self.tgt_vocab[self.eos_token] *\ - np.ones((batch_size, max_tgt_len), dtype=np.float32) + b_dec_ins_np = self.tgt_vocab[self.eos_token] * \ + np.ones((batch_size, max_tgt_len), dtype=np.float32) + b_dec_outs_np = self.tgt_vocab[self.eos_token] * \ + np.ones((batch_size, max_tgt_len), dtype=np.float32) b_tgt_weights_np = np.zeros((batch_size, max_tgt_len), dtype=np.float32) b_kb_masks_np = np.zeros((batch_size, self.kb_size), np.float32) for i, (src_len, tgt_len, kb_entries) in \ @@ -184,6 +185,7 @@ def _idx2token(idxs): yield token else: yield self.kb_keys[idx - self.tgt_vocab_size] + return [list(_idx2token(utter_idxs)) for utter_idxs in token_idxs] def __call__(self, *batch): @@ -225,4 +227,3 @@ def save(self): def load(self): pass - diff --git a/deeppavlov/models/seq2seq_go_bot/dialog_state.py b/deeppavlov/models/seq2seq_go_bot/dialog_state.py index 49f8f0e463..48e1f6b4c7 100644 --- a/deeppavlov/models/seq2seq_go_bot/dialog_state.py +++ b/deeppavlov/models/seq2seq_go_bot/dialog_state.py @@ -30,4 +30,3 @@ def __call__(self, user_ids, utterances=None, *args, **kwargs): for user, utter in zip(user_ids, utterances): self.states[user] = self.states.get(user, []) + utter return - diff --git a/deeppavlov/models/seq2seq_go_bot/kb.py b/deeppavlov/models/seq2seq_go_bot/kb.py index 5b2cc252c1..aecde5626b 100644 --- a/deeppavlov/models/seq2seq_go_bot/kb.py +++ b/deeppavlov/models/seq2seq_go_bot/kb.py @@ -53,6 +53,7 @@ class KnowledgeBase(Estimator): **kwargs: parameters passed to parent :class:`~deeppavlov.core.models.estimator.Estimator`. """ + def __init__(self, save_path: str, load_path: str = None, @@ -82,6 +83,7 @@ def _update(self, keys, kb_columns_list, kb_items_list, update_primary_keys=True def _key_value_entries(self, kb_item, kb_columns, update=True): def _format(s): return re.sub('\s+', '_', s.lower().strip()) + first_key = _format(kb_item[kb_columns[0]]) for col in kb_columns: key = first_key + '_' + _format(col) @@ -182,18 +184,18 @@ def normalize(self, tokens, entries): ent_num_tokens = len(ent_tokens) if ' '.join(ent_tokens).strip(): for i in range(len(tokens)): - if tokens[i:i+ent_num_tokens] == ent_tokens: + if tokens[i:i + ent_num_tokens] == ent_tokens: if self.remove: - tokens = tokens[:i] + tokens[i+ent_num_tokens:] + tokens = tokens[:i] + tokens[i + ent_num_tokens:] else: - tokens = tokens[:i] + [entity] + tokens[i+ent_num_tokens:] + tokens = tokens[:i] + [entity] + tokens[i + ent_num_tokens:] return tokens def denormalize(self, tokens, entries): for entity, ent_tokens in entries: while (entity in tokens): ent_pos = tokens.index(entity) - tokens = tokens[:ent_pos] + ent_tokens + tokens[ent_pos+1:] + tokens = tokens[:ent_pos] + ent_tokens + tokens[ent_pos + 1:] return tokens def __call__(self, diff --git a/deeppavlov/models/seq2seq_go_bot/kb_attn_layer.py b/deeppavlov/models/seq2seq_go_bot/kb_attn_layer.py index c93465be84..3f9c6be5d1 100644 --- a/deeppavlov/models/seq2seq_go_bot/kb_attn_layer.py +++ b/deeppavlov/models/seq2seq_go_bot/kb_attn_layer.py @@ -21,7 +21,7 @@ class KBAttention(base.Layer): -# TODO: update class doc + # TODO: update class doc """Densely-connected layer class. Arguments: units: Integer or Long, dimensionality of the output space. @@ -104,10 +104,10 @@ def __init__(self, units, hidden_sizes, "_reuse": reuse } # print("KB shape =", self.kb_input_shape) - + def build(self, input_shape): # if in_shape[:-1] != self.kb_inputs.shape -# TODO: check input shape + # TODO: check input shape # print("in build") in_shape = input_shape[:1].concatenate(self.kb_input_shape) in_shape = in_shape[:-1].concatenate(in_shape[-1] + input_shape[-1]) @@ -120,7 +120,7 @@ def build(self, input_shape): layer = tf.layers.Dense(size, name=name, _scope=name, **self.dense_params) layer.build(in_shape) in_shape = layer.compute_output_shape(in_shape) - + self.layers.append(layer) # print("input_shape =", input_shape) @@ -131,10 +131,10 @@ def build(self, input_shape): self.output_layer.build(input_shape) # print("build = True") self.built = True - + def call(self, inputs): # print("in call") -# TODO: check input dtype + # TODO: check input dtype # Tile kb_inputs kb_inputs = self.kb_inputs diff --git a/deeppavlov/models/seq2seq_go_bot/network.py b/deeppavlov/models/seq2seq_go_bot/network.py index fecb982f5f..758006e804 100644 --- a/deeppavlov/models/seq2seq_go_bot/network.py +++ b/deeppavlov/models/seq2seq_go_bot/network.py @@ -179,7 +179,7 @@ def _build_graph(self): tf.verify_tensor_all_finite(_loss_tensor, "Non finite values in loss tensor.") self._loss = tf.reduce_sum(_loss_tensor) / tf.cast(self._batch_size, tf.float32) # self._loss = tf.reduce_mean(_loss_tensor, name='loss') -# TODO: tune clip_norm + # TODO: tune clip_norm self._train_op = \ self.get_train_op(self._loss, learning_rate=self._learning_rate, @@ -221,7 +221,7 @@ def _add_placeholders(self): [None, None], name='decoder_outputs') # _kb_embedding: [kb_size, embedding_size] -# TODO: try training embeddings + # TODO: try training embeddings kb_W = np.array(self.kb_embedding)[:, :self.embedding_size] self._kb_embedding = tf.get_variable("kb_embedding", shape=(kb_W.shape[0], kb_W.shape[1]), @@ -231,7 +231,7 @@ def _add_placeholders(self): # _kb_mask: [batch_size, kb_size] self._kb_mask = tf.placeholder(tf.float32, [None, None], name='kb_mask') -# TODO: compute sequence lengths on the go + # TODO: compute sequence lengths on the go # _src_sequence_lengths, _tgt_sequence_lengths: [batch_size] self._src_sequence_lengths = tf.placeholder(tf.int32, [None], @@ -272,7 +272,7 @@ def _build_encoder(self): # Run Dynamic RNN # _encoder_outputs: [max_time, batch_size, hidden_size] # _encoder_state: [batch_size, hidden_size] -# input_states? + # input_states? _encoder_outputs, _encoder_state = tf.nn.dynamic_rnn( _encoder_cell, _encoder_emb_inp, dtype=tf.float32, sequence_length=self._src_sequence_lengths, time_major=False) @@ -346,8 +346,8 @@ def build_dec_cell(enc_out, enc_seq_len, reuse=None): _decoder_emb_inp, self._tgt_sequence_lengths, time_major=False) # Copy encoder hidden state to decoder inital state _decoder_init_state = \ - _decoder_cell_tr.zero_state(self._batch_size, dtype=tf.float32)\ - .clone(cell_state=self._encoder_state) + _decoder_cell_tr.zero_state(self._batch_size, dtype=tf.float32) \ + .clone(cell_state=self._encoder_state) _decoder_tr = \ tf.contrib.seq2seq.BasicDecoder(_decoder_cell_tr, _helper_tr, initial_state=_decoder_init_state, @@ -377,21 +377,21 @@ def build_dec_cell(enc_out, enc_seq_len, reuse=None): # Decoder Init State _decoder_init_state = \ _decoder_cell_inf.zero_state(tf.shape(_tiled_encoder_outputs)[0], - dtype=tf.float32)\ - .clone(cell_state=_tiled_encoder_state) + dtype=tf.float32) \ + .clone(cell_state=_tiled_encoder_state) # Define a beam-search decoder _start_tokens = tf.tile(tf.constant([self.tgt_sos_id], tf.int32), [self._batch_size]) # _start_tokens = tf.fill([self._batch_size], self.tgt_sos_id) _decoder_inf = tf.contrib.seq2seq.BeamSearchDecoder( - cell=_decoder_cell_inf, - embedding=self._decoder_embedding, - start_tokens=_start_tokens, - end_token=self.tgt_eos_id, - initial_state=_decoder_init_state, - beam_width=self.beam_width, - output_layer=_kb_attn_layer, - length_penalty_weight=0.0) + cell=_decoder_cell_inf, + embedding=self._decoder_embedding, + start_tokens=_start_tokens, + end_token=self.tgt_eos_id, + initial_state=_decoder_init_state, + beam_width=self.beam_width, + output_layer=_kb_attn_layer, + length_penalty_weight=0.0) # Wrap into variable scope to share attention parameters # Required! @@ -421,7 +421,7 @@ def __call__(self, enc_inputs, src_seq_lengths, kb_masks, prob=False): self._kb_mask: kb_masks } ) -# TODO: implement infer probabilities + # TODO: implement infer probabilities if prob: raise NotImplementedError("Probs not available for now.") return predictions @@ -449,8 +449,8 @@ def get_learning_rate(self): # polynomial decay global_step = min(self.global_step, self.decay_steps) decayed_learning_rate = \ - (self.learning_rate - self.end_learning_rate) *\ - (1 - global_step / self.decay_steps) ** self.decay_power +\ + (self.learning_rate - self.end_learning_rate) * \ + (1 - global_step / self.decay_steps) ** self.decay_power + \ self.end_learning_rate return decayed_learning_rate @@ -465,9 +465,9 @@ def load_params(self): params = json.load(fp) for p in self.GRAPH_PARAMS: if self.opt.get(p) != params.get(p): - if p in ('kb_embedding_control_sum') and\ + if p in ('kb_embedding_control_sum') and \ (math.abs(self.opt.get(p, 0.) - params.get(p, 0.)) < 1e-3): - continue + continue raise ConfigError("`{}` parameter must be equal to saved model" " parameter value `{}`, but is equal to `{}`" .format(p, params.get(p), self.opt.get(p))) diff --git a/deeppavlov/models/sklearn/sklearn_component.py b/deeppavlov/models/sklearn/sklearn_component.py index ce81175602..3bca87c18e 100644 --- a/deeppavlov/models/sklearn/sklearn_component.py +++ b/deeppavlov/models/sklearn/sklearn_component.py @@ -60,6 +60,7 @@ class SklearnComponent(Estimator): e.g. ``predict``, ``predict_proba``, ``predict_log_proba``, ``transform`` ensure_list_output: whether to ensure that output for each sample is iterable (but not string) """ + def __init__(self, model_class: str, save_path: Union[str, Path] = None, load_path: Union[str, Path] = None, diff --git a/deeppavlov/models/slotfill/slotfill.py b/deeppavlov/models/slotfill/slotfill.py index 87e8ce6d74..b977225e87 100644 --- a/deeppavlov/models/slotfill/slotfill.py +++ b/deeppavlov/models/slotfill/slotfill.py @@ -29,6 +29,7 @@ @register('dstc_slotfilling') class DstcSlotFillingNetwork(Component, Serializable): """Slot filling for DSTC2 task with neural network""" + def __init__(self, threshold: float = 0.8, **kwargs): super().__init__(**kwargs) self.threshold = threshold diff --git a/deeppavlov/models/slotfill/slotfill_raw.py b/deeppavlov/models/slotfill/slotfill_raw.py index 2689eb2349..9320cb7ff0 100644 --- a/deeppavlov/models/slotfill/slotfill_raw.py +++ b/deeppavlov/models/slotfill/slotfill_raw.py @@ -29,6 +29,7 @@ @register('slotfill_raw') class SlotFillingComponent(Component, Serializable): """Slot filling using Fuzzy search""" + def __init__(self, threshold: float = 0.7, return_all: bool = False, **kwargs): super().__init__(**kwargs) self.threshold = threshold diff --git a/deeppavlov/models/spelling_correction/brillmoore/error_model.py b/deeppavlov/models/spelling_correction/brillmoore/error_model.py index fae86445ca..9d533e9556 100644 --- a/deeppavlov/models/spelling_correction/brillmoore/error_model.py +++ b/deeppavlov/models/spelling_correction/brillmoore/error_model.py @@ -47,7 +47,7 @@ class ErrorModel(Estimator): candidates_count: maximum number of replacement candidates to return for every token in the input """ - def __init__(self, dictionary: StaticDictionary, window: int=1, candidates_count: int=1, *args, **kwargs): + def __init__(self, dictionary: StaticDictionary, window: int = 1, candidates_count: int = 1, *args, **kwargs): super().__init__(*args, **kwargs) self.costs = defaultdict(itertools.repeat(float('-inf')).__next__) self.dictionary = dictionary diff --git a/deeppavlov/models/spelling_correction/electors/kenlm_elector.py b/deeppavlov/models/spelling_correction/electors/kenlm_elector.py index deb3107ad7..4eb7978be6 100644 --- a/deeppavlov/models/spelling_correction/electors/kenlm_elector.py +++ b/deeppavlov/models/spelling_correction/electors/kenlm_elector.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import kenlm from logging import getLogger from pathlib import Path from typing import List, Tuple +import kenlm + from deeppavlov.core.commands.utils import expand_path from deeppavlov.core.common.registry import register from deeppavlov.core.models.component import Component @@ -36,7 +37,8 @@ class KenlmElector(Component): lm: kenlm object beam_size: beam size for highest probability search """ - def __init__(self, load_path: Path, beam_size: int=4, *args, **kwargs): + + def __init__(self, load_path: Path, beam_size: int = 4, *args, **kwargs): self.lm = kenlm.Model(str(expand_path(load_path))) self.beam_size = beam_size diff --git a/deeppavlov/models/spelling_correction/levenshtein/levenshtein_searcher.py b/deeppavlov/models/spelling_correction/levenshtein/levenshtein_searcher.py index 3799ec16e0..887ac927f0 100644 --- a/deeppavlov/models/spelling_correction/levenshtein/levenshtein_searcher.py +++ b/deeppavlov/models/spelling_correction/levenshtein/levenshtein_searcher.py @@ -13,6 +13,7 @@ class LevenshteinSearcher: в соответствии с расстоянием Левенштейна """ + def __init__(self, alphabet, dictionary, operation_costs=None, allow_spaces=False, euristics='none'): self.alphabet = alphabet @@ -65,7 +66,7 @@ def _trie_search(self, word, d, transducer=None, trie = self.dictionary # инициализация переменных used_agenda_keys = set() - agenda = SortedListWithKey(key=(lambda x:x[1])) + agenda = SortedListWithKey(key=(lambda x: x[1])) h = self.h_func(word, trie.root) # agenda[self.agenda_key("", 0, trie.root)] = (0.0, 0.0, h) key, value = ("", 0, trie.root), (0.0, 0.0, h) @@ -91,7 +92,7 @@ def _trie_search(self, word, d, transducer=None, continue for curr_low, curr_cost in transducer.operation_costs[curr_up].items(): new_g = g + curr_cost - if new_g > d: #если g > d, то h можно не вычислять + if new_g > d: # если g > d, то h можно не вычислять continue if curr_low == " ": if allow_spaces and trie.is_final(index): @@ -103,7 +104,7 @@ def _trie_search(self, word, d, transducer=None, if new_index is Trie.NO_NODE: continue new_low = low + curr_low - new_h = self.h_func(word[new_pos: ], new_index) + new_h = self.h_func(word[new_pos:], new_index) new_cost = new_g + new_h if new_cost > d: continue @@ -129,8 +130,8 @@ def _precompute_euristics(self): return # вычисление минимальной стоимости операции, # приводящей к появлению ('+') или исчезновению ('-') данного символа - removal_costs = {a : np.inf for a in self.alphabet} - insertion_costs = {a : np.inf for a in self.alphabet} + removal_costs = {a: np.inf for a in self.alphabet} + insertion_costs = {a: np.inf for a in self.alphabet} if self.allow_spaces: removal_costs[' '] = np.inf insertion_costs[' '] = np.inf @@ -250,10 +251,10 @@ def _precompute_absense_costs(dictionary, removal_costs, insertion_costs, n, curr_node_removal_costs[0] = min(removal_costs[symbol] for symbol in node[0]) for j, symbols in enumerate(node[1:], 1): if len(symbols) == 0: - curr_node_removal_costs[j:] = curr_node_removal_costs[j-1] + curr_node_removal_costs[j:] = curr_node_removal_costs[j - 1] break curr_cost = min(removal_costs[symbol] for symbol in symbols) - curr_node_removal_costs[j] = min(curr_node_removal_costs[j-1], curr_cost) + curr_node_removal_costs[j] = min(curr_node_removal_costs[j - 1], curr_cost) else: curr_node_removal_costs[:] = np.inf # определение минимальной стоимости вставки @@ -288,6 +289,7 @@ class SegmentTransducer: и они равны значению по умолчанию) """ + def __init__(self, alphabet, operation_costs=None, allow_spaces=False): self.alphabet = alphabet if operation_costs is None: @@ -300,10 +302,10 @@ def __init__(self, alphabet, operation_costs=None, allow_spaces=False): self._make_maximal_key_lengths() # self.maximal_value_lengths = {} # for up, probs in self.operation_costs.items(): - # СЛИШКОМ МНОГО ВЫЗОВОВ, НАДО КАК-ТО ЗАПОМНИТЬ - # МАКСИМАЛЬНЫЕ ДЛИНЫ КЛЮЧЕЙ ПРИ ОБРАЩЕНИИ - # max_low_length = max(len(low) for low in probs) if (len(probs) > 0) else -1 - # self.maximal_value_lengths[up] = self.maximal_key_length + # СЛИШКОМ МНОГО ВЫЗОВОВ, НАДО КАК-ТО ЗАПОМНИТЬ + # МАКСИМАЛЬНЫЕ ДЛИНЫ КЛЮЧЕЙ ПРИ ОБРАЩЕНИИ + # max_low_length = max(len(low) for low in probs) if (len(probs) > 0) else -1 + # self.maximal_value_lengths[up] = self.maximal_key_length def get_operation_cost(self, up, low): """ @@ -341,7 +343,7 @@ def inverse(self): inversed_transducer.max_up_lengths_by_low = self.max_low_lengths_by_up return inversed_transducer - def distance(self, first, second, return_transduction = False): + def distance(self, first, second, return_transduction=False): """ Вычисляет трансдукцию минимальной стоимости, отображающую first в second @@ -374,7 +376,7 @@ def distance(self, first, second, return_transduction = False): clear_pred = (lambda x, y: x < y < np.inf) update_func = lambda x, y: min(x, y) costs, backtraces = self._fill_levenshtein_table(first, second, - update_func, add_pred, clear_pred) + update_func, add_pred, clear_pred) final_cost = costs[-1][-1] if final_cost == np.inf: transductions = [None] @@ -397,11 +399,11 @@ def transduce(self, first, second, threshold): список вида [(трансдукция, стоимость)] """ add_pred = (lambda x, y: x <= threshold) - clear_pred =(lambda x, y: False) + clear_pred = (lambda x, y: False) update_func = (lambda x, y: min(x, y)) costs, backtraces = self._fill_levenshtein_table(first, second, - update_func, add_pred, clear_pred, - threshold=threshold) + update_func, add_pred, clear_pred, + threshold=threshold) result = self._backtraces_to_transductions(first, second, backtraces, threshold, return_cost=True) return result @@ -430,7 +432,7 @@ def lower_transductions(self, word, max_cost, return_cost=True): for transduction, cost in prefixes[pos]: new_cost = cost + low_cost if new_cost <= max_cost: - new_transduction = transduction +(up, low) + new_transduction = transduction + (up, low) prefixes[pos + upperside_length].append((new_transduction, new_cost)) answer = sorted(prefixes[-1], key=(lambda x: x[0])) if return_cost: @@ -461,7 +463,7 @@ def upper_transductions(self, word, max_cost, return_cost=True): return inversed_transducer.lower_transductions(word, max_cost, return_cost) def _fill_levenshtein_table(self, first, second, update_func, add_pred, clear_pred, - threshold=None): + threshold=None): """ Функция, динамически заполняющая таблицу costs стоимости трансдукций, costs[i][j] --- минимальная стоимость трансдукции, @@ -502,10 +504,10 @@ def _fill_levenshtein_table(self, first, second, update_func, add_pred, clear_pr for a, b in zip(first, second): threshold += self.get_operation_cost(a, b) if m > n: - for a in first[n: ]: + for a in first[n:]: threshold += self.get_operation_cost(a, '') elif m < n: - for b in second[m: ]: + for b in second[m:]: threshold += self.get_operation_cost('', b) threshold *= 2 # инициализация возвращаемых массивов @@ -519,14 +521,14 @@ def _fill_levenshtein_table(self, first, second, update_func, add_pred, clear_pr for i_right in range(i, min(i + self.max_up_length, m) + 1): up = first[i: i_right] max_low_length = self.max_low_lengths_by_up.get(up, -1) - if max_low_length == -1: # no up key in transduction + if max_low_length == -1: # no up key in transduction continue up_costs = self.operation_costs[up] for j in range(n + 1): if costs[i][j] > threshold: continue if len(backtraces[i][j]) == 0 and i + j > 0: - continue # не нашлось обратных ссылок + continue # не нашлось обратных ссылок for j_right in range((j if i_right > i else j + 1), min(j + max_low_length, n) + 1): low = second[j: j_right] @@ -562,18 +564,18 @@ def _make_maximal_key_lengths(self): и максимальную длину элемента up в элементарной трансдукции (up, low) для каждого low """ - self.max_up_length =\ + self.max_up_length = \ (max(len(up) for up in self.operation_costs) if len(self.operation_costs) > 0 else -1) - self.max_low_length =\ + self.max_low_length = \ (max(len(low) for low in self._reversed_operation_costs) if len(self._reversed_operation_costs) > 0 else -1) self.max_low_lengths_by_up, self.max_up_lengths_by_low = dict(), dict() for up, costs in self.operation_costs.items(): - self.max_low_lengths_by_up[up] =\ + self.max_low_lengths_by_up[up] = \ max(len(low) for low in costs) if len(costs) > 0 else -1 for low, costs in self._reversed_operation_costs.items(): - self.max_up_lengths_by_low[low] =\ + self.max_up_lengths_by_low[low] = \ max(len(up) for up in costs) if len(costs) > 0 else -1 def _backtraces_to_transductions(self, first, second, backtraces, threshold, return_cost=False): @@ -603,7 +605,7 @@ def _backtraces_to_transductions(self, first, second, backtraces, threshold, ret m, n = len(first), len(second) agenda = [None] * (m + 1) for i in range(m + 1): - agenda[i] = [[] for j in range(n+1)] + agenda[i] = [[] for j in range(n + 1)] agenda[m][n] = [((), 0.0)] for i_right in range(m, -1, -1): for j_right in range(n, -1, -1): @@ -615,7 +617,7 @@ def _backtraces_to_transductions(self, first, second, backtraces, threshold, ret add_cost = self.operation_costs[up][low] for elem, cost in current_agenda: new_cost = cost + add_cost - if new_cost <= threshold: # удаление трансдукций большой стоимости + if new_cost <= threshold: # удаление трансдукций большой стоимости agenda[i][j].append((((up, low),) + elem, new_cost)) if return_cost: return agenda[0][0] diff --git a/deeppavlov/models/spelling_correction/levenshtein/tabled_trie.py b/deeppavlov/models/spelling_correction/levenshtein/tabled_trie.py index c6d91ce439..5aace19731 100644 --- a/deeppavlov/models/spelling_correction/levenshtein/tabled_trie.py +++ b/deeppavlov/models/spelling_correction/levenshtein/tabled_trie.py @@ -159,7 +159,6 @@ def words(self): branch.append(next_child) letters_with_children.append(self._get_children_and_letters(branch[-1])) - def is_final(self, index): """ Аргументы @@ -189,13 +188,13 @@ def find_partitions(self, s, max_count=1): continue next_agenda.append((child, borders, cost)) if self.is_final(child): - next_agenda.append((self.root, borders + [i+1], cost+1)) + next_agenda.append((self.root, borders + [i + 1], cost + 1)) curr_agenda = next_agenda answer = [] for curr, borders, cost in curr_agenda: if curr == self.root: borders = [0] + borders - answer.append([s[left:borders[i+1]] for i, left in enumerate(borders[:-1])]) + answer.append([s[left:borders[i + 1]] for i, left in enumerate(borders[:-1])]) return answer def __len__(self): @@ -275,8 +274,8 @@ def _get_letters(self, index, return_indexes=False): if self.dict_storage: answer = list(self.graph[index].keys()) else: - answer = [i for i, elem in enumerate(self.graph[index]) - if elem != Trie.NO_NODE] + answer = [i for i, elem in enumerate(self.graph[index]) + if elem != Trie.NO_NODE] if not return_indexes: answer = [(self.alphabet[i] if i >= 0 else " ") for i in answer] return answer @@ -285,8 +284,8 @@ def _get_children_and_letters(self, index, return_indexes=False): if self.dict_storage: answer = list(self.graph[index].items()) else: - answer = [elem for elem in enumerate(self.graph[index]) - if elem[1] != Trie.NO_NODE] + answer = [elem for elem in enumerate(self.graph[index]) + if elem[1] != Trie.NO_NODE] if not return_indexes: for i, (letter_index, child) in enumerate(answer): answer[i] = (self.alphabet[letter_index], child) @@ -318,7 +317,7 @@ def minimize(self, trie, dict_storage=False, make_cashed=False, make_numpied=Fal node_classes[index] = 0 class_representatives = [index] node_key = ((), (), trie.is_final(index)) - classes, class_keys = {node_key : 0}, [node_key] + classes, class_keys = {node_key: 0}, [node_key] curr_index = 1 for index in order[1:]: letter_indexes = tuple(trie._get_letters(index, return_indexes=True)) @@ -348,9 +347,9 @@ def minimize(self, trie, dict_storage=False, make_cashed=False, make_numpied=Fal new_final = np.array(new_final, dtype=bool) else: new_graph = [[Trie.NO_NODE for a in trie.alphabet] for i in range(L)] - for (indexes, children, final), class_index in\ + for (indexes, children, final), class_index in \ sorted(classes.items(), key=(lambda x: x[1])): - row = new_graph[L-class_index-1] + row = new_graph[L - class_index - 1] for i, child_index in zip(indexes, children): row[i] = L - child_index - 1 compressed.graph = new_graph @@ -386,7 +385,7 @@ def generate_postorder(self, trie): while len(stack) > 0: index = stack[-1] color = colors[index] - if color == 'white': # вершина ещё не обрабатывалась + if color == 'white': # вершина ещё не обрабатывалась colors[index] = 'grey' for child in trie._get_children(index): # проверяем, посещали ли мы ребёнка раньше @@ -403,7 +402,7 @@ def generate_postorder(self, trie): def load_trie(infile): with open(infile, "r", encoding="utf8") as fin: line = fin.readline().strip() - flags = [x=='T' for x in line.split()] + flags = [x == 'T' for x in line.split()] if len(flags) != len(Trie.ATTRS) + 1: raise ValueError("Wrong file format") nodes_number, root = map(int, fin.readline().strip().split()) @@ -413,7 +412,7 @@ def load_trie(infile): setattr(trie, attr, flags[i]) read_data = flags[-1] final = [False] * nodes_number - #print(len(alphabet), nodes_number) + # print(len(alphabet), nodes_number) if trie.dict_storage: graph = [defaultdict(lambda: -1) for _ in range(nodes_number)] elif trie.is_numpied: diff --git a/deeppavlov/models/squad/squad.py b/deeppavlov/models/squad/squad.py index 3a50eb430a..55c548cbf1 100644 --- a/deeppavlov/models/squad/squad.py +++ b/deeppavlov/models/squad/squad.py @@ -51,6 +51,7 @@ class SquadModel(LRScheduledTFModel): min_learning_rate: minimal learning rate, is used in learning rate decay noans_token: boolean, flags whether to use special no_ans token to make model able not to answer on question """ + def __init__(self, word_emb: np.ndarray, char_emb: np.ndarray, context_limit: int = 450, question_limit: int = 150, char_limit: int = 16, train_char_emb: bool = True, char_hidden_size: int = 100, encoder_hidden_size: int = 75, attention_hidden_size: int = 75, keep_prob: float = 0.7, @@ -217,8 +218,8 @@ def _init_placeholders(self): self.cc_ph = tf.placeholder(shape=(None, None, self.char_limit), dtype=tf.int32, name='cc_ph') self.q_ph = tf.placeholder(shape=(None, None), dtype=tf.int32, name='q_ph') self.qc_ph = tf.placeholder(shape=(None, None, self.char_limit), dtype=tf.int32, name='qc_ph') - self.y1_ph = tf.placeholder(shape=(None, ), dtype=tf.int32, name='y1_ph') - self.y2_ph = tf.placeholder(shape=(None, ), dtype=tf.int32, name='y2_ph') + self.y1_ph = tf.placeholder(shape=(None,), dtype=tf.int32, name='y1_ph') + self.y2_ph = tf.placeholder(shape=(None,), dtype=tf.int32, name='y2_ph') self.lear_rate_ph = tf.placeholder_with_default(0.0, shape=[], name='learning_rate') self.keep_prob_ph = tf.placeholder_with_default(1.0, shape=[], name='keep_prob_ph') diff --git a/deeppavlov/models/squad/utils.py b/deeppavlov/models/squad/utils.py index f04ab2b88a..d9ac2e4d92 100644 --- a/deeppavlov/models/squad/utils.py +++ b/deeppavlov/models/squad/utils.py @@ -48,11 +48,11 @@ def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers= init_fw, init_bw = self.inits[layer] mask_fw, mask_bw = self.dropout_mask[layer] with tf.variable_scope('fw_{}'.format(layer), reuse=tf.AUTO_REUSE): - out_fw, _ = gru_fw(outputs[-1] * mask_fw, (init_fw, )) + out_fw, _ = gru_fw(outputs[-1] * mask_fw, (init_fw,)) with tf.variable_scope('bw_{}'.format(layer), reuse=tf.AUTO_REUSE): inputs_bw = tf.reverse_sequence( outputs[-1] * mask_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1) - out_bw, _ = gru_bw(inputs_bw, (init_bw, )) + out_bw, _ = gru_bw(inputs_bw, (init_bw,)) out_bw = tf.reverse_sequence( out_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1) outputs.append(tf.concat([out_fw, out_bw], axis=2)) diff --git a/deeppavlov/models/tokenizers/lazy_tokenizer.py b/deeppavlov/models/tokenizers/lazy_tokenizer.py index 21af61a65f..f437bcbfb8 100644 --- a/deeppavlov/models/tokenizers/lazy_tokenizer.py +++ b/deeppavlov/models/tokenizers/lazy_tokenizer.py @@ -26,6 +26,7 @@ @register('lazy_tokenizer') class LazyTokenizer(Component): """Tokenizes if there is something to tokenize.""" + def __init__(self, **kwargs): pass diff --git a/deeppavlov/models/tokenizers/nltk_moses_tokenizer.py b/deeppavlov/models/tokenizers/nltk_moses_tokenizer.py index c42fb723c9..64c34b7fcf 100644 --- a/deeppavlov/models/tokenizers/nltk_moses_tokenizer.py +++ b/deeppavlov/models/tokenizers/nltk_moses_tokenizer.py @@ -32,7 +32,7 @@ class NLTKMosesTokenizer(Component): escape: whether escape characters for use in html markup """ - def __init__(self, escape: bool=False, *args, **kwargs): + def __init__(self, escape: bool = False, *args, **kwargs): self.escape = escape self.tokenizer = MosesTokenizer() self.detokenizer = MosesDetokenizer() diff --git a/deeppavlov/models/tokenizers/nltk_tokenizer.py b/deeppavlov/models/tokenizers/nltk_tokenizer.py index 12be4a5444..08a2072ee5 100644 --- a/deeppavlov/models/tokenizers/nltk_tokenizer.py +++ b/deeppavlov/models/tokenizers/nltk_tokenizer.py @@ -31,6 +31,7 @@ class NLTKTokenizer(Component): Attributes: tokenizer: tokenizer instance from nltk.tokenizers """ + def __init__(self, tokenizer: str = "wordpunct_tokenize", download: bool = False, *args, **kwargs): if download: diff --git a/deeppavlov/models/tokenizers/ru_sent_tokenizer.py b/deeppavlov/models/tokenizers/ru_sent_tokenizer.py index f8e38d470b..15055d5c37 100644 --- a/deeppavlov/models/tokenizers/ru_sent_tokenizer.py +++ b/deeppavlov/models/tokenizers/ru_sent_tokenizer.py @@ -34,11 +34,11 @@ class RuSentTokenizer(Component): Use default value if working on news or fiction texts """ + def __init__(self, shortenings: Set[str] = SHORTENINGS, joining_shortenings: Set[str] = JOINING_SHORTENINGS, paired_shortenings: Set[Tuple[str, str]] = PAIRED_SHORTENINGS, **kwargs): - self.shortenings = shortenings self.joining_shortenings = joining_shortenings self.paired_shortenings = paired_shortenings diff --git a/deeppavlov/models/tokenizers/ru_tokenizer.py b/deeppavlov/models/tokenizers/ru_tokenizer.py index 3017dce007..e51478a079 100644 --- a/deeppavlov/models/tokenizers/ru_tokenizer.py +++ b/deeppavlov/models/tokenizers/ru_tokenizer.py @@ -99,7 +99,7 @@ def __call__(self, batch: Union[List[str], List[List[str]]]) -> \ raise TypeError( "StreamSpacyTokenizer.__call__() is not implemented for `{}`".format(type(batch[0]))) - def _tokenize(self, data: List[str], ngram_range: Tuple[int, int]=(1, 1), lowercase: bool=True)\ + def _tokenize(self, data: List[str], ngram_range: Tuple[int, int] = (1, 1), lowercase: bool = True) \ -> Generator[List[str], Any, None]: """Tokenize a list of documents. @@ -135,7 +135,7 @@ def _tokenize(self, data: List[str], ngram_range: Tuple[int, int]=(1, 1), lowerc processed_doc = ngramize(filtered, ngram_range=_ngram_range) yield from processed_doc - def _lemmatize(self, data: List[str], ngram_range: Tuple[int, int]=(1, 1)) -> \ + def _lemmatize(self, data: List[str], ngram_range: Tuple[int, int] = (1, 1)) -> \ Generator[List[str], Any, None]: """Lemmatize a list of documents. @@ -171,7 +171,7 @@ def _lemmatize(self, data: List[str], ngram_range: Tuple[int, int]=(1, 1)) -> \ processed_doc = ngramize(filtered, ngram_range=_ngram_range) yield from processed_doc - def _filter(self, items: List[str], alphas_only: bool=True) -> List[str]: + def _filter(self, items: List[str], alphas_only: bool = True) -> List[str]: """Filter a list of tokens/lemmas. Args: @@ -205,5 +205,3 @@ def set_stopwords(self, stopwords: List[str]) -> None: """ self.stopwords = stopwords - - diff --git a/deeppavlov/models/tokenizers/spacy_tokenizer.py b/deeppavlov/models/tokenizers/spacy_tokenizer.py index 567d8204af..11521a4f4f 100644 --- a/deeppavlov/models/tokenizers/spacy_tokenizer.py +++ b/deeppavlov/models/tokenizers/spacy_tokenizer.py @@ -178,7 +178,7 @@ def _lemmatize(self, data: List[str], ngram_range: Optional[Tuple[int, int]] = N processed_doc = ngramize(filtered, ngram_range=_ngram_range) yield from processed_doc - def _filter(self, items: List[str], alphas_only: bool=True) -> List[str]: + def _filter(self, items: List[str], alphas_only: bool = True) -> List[str]: """Filter a list of tokens/lemmas. Args: diff --git a/deeppavlov/models/tokenizers/split_tokenizer.py b/deeppavlov/models/tokenizers/split_tokenizer.py index 4e543784ab..79a15a677a 100644 --- a/deeppavlov/models/tokenizers/split_tokenizer.py +++ b/deeppavlov/models/tokenizers/split_tokenizer.py @@ -25,6 +25,7 @@ class SplitTokenizer(Component): Doesn't have any parameters. """ + def __init__(self, **kwargs) -> None: pass diff --git a/deeppavlov/models/vectorizers/word_vectorizer.py b/deeppavlov/models/vectorizers/word_vectorizer.py index 7d04ee8b92..7f93c94556 100644 --- a/deeppavlov/models/vectorizers/word_vectorizer.py +++ b/deeppavlov/models/vectorizers/word_vectorizer.py @@ -79,6 +79,7 @@ class DictionaryVectorizer(WordIndexVectorizer): min_freq: minimal frequency of tag to memorize this tag, unk_token: unknown token to be yielded for unknown words """ + def __init__(self, save_path: str, load_path: Union[str, List[str]], min_freq: int = 1, unk_token: str = None, **kwargs) -> None: super().__init__(save_path, load_path, **kwargs) @@ -116,7 +117,7 @@ def load(self) -> None: labels_by_words[word].update(labels.split()) self._initialize(labels_by_words) - def _initialize(self, labels_by_words : Dict): + def _initialize(self, labels_by_words: Dict): self._i2t = [self.unk_token] if self.unk_token is not None else [] self._t2i = defaultdict(lambda: self.unk_token) freq = defaultdict(int) @@ -286,4 +287,3 @@ def _get_tag_indexes(self, pymorphy_tag): tag = self.converter(str(pymorphy_tag)) answer = self.memorized_tag_indexes[pymorphy_tag] = self.find_compatible(tag) return answer - diff --git a/deeppavlov/skills/aiml_skill/aiml_skill.py b/deeppavlov/skills/aiml_skill/aiml_skill.py index 384730ddff..72c69113ca 100644 --- a/deeppavlov/skills/aiml_skill/aiml_skill.py +++ b/deeppavlov/skills/aiml_skill/aiml_skill.py @@ -16,6 +16,8 @@ from pathlib import Path from typing import Tuple, Optional, List from logging import getLogger +from pathlib import Path +from typing import Tuple, Optional, List import aiml diff --git a/deeppavlov/skills/dsl_skill/context.py b/deeppavlov/skills/dsl_skill/context.py index b1dc1b3cf4..acbfc6c5b9 100644 --- a/deeppavlov/skills/dsl_skill/context.py +++ b/deeppavlov/skills/dsl_skill/context.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Union, Dict - import json +from typing import Optional, Union, Dict from deeppavlov.skills.dsl_skill.utils import UserId diff --git a/deeppavlov/utils/pip_wrapper/__init__.py b/deeppavlov/utils/pip_wrapper/__init__.py index 24cb413c4d..e2d482331e 100644 --- a/deeppavlov/utils/pip_wrapper/__init__.py +++ b/deeppavlov/utils/pip_wrapper/__init__.py @@ -1 +1 @@ -from .pip_wrapper import * \ No newline at end of file +from .pip_wrapper import * diff --git a/deeppavlov/utils/pip_wrapper/pip_wrapper.py b/deeppavlov/utils/pip_wrapper/pip_wrapper.py index 175abfdc46..a829af0cc6 100644 --- a/deeppavlov/utils/pip_wrapper/pip_wrapper.py +++ b/deeppavlov/utils/pip_wrapper/pip_wrapper.py @@ -13,13 +13,13 @@ def install(*packages): - if any(_tf_re.match(package) for package in packages)\ + if any(_tf_re.match(package) for package in packages) \ and b'tensorflow-gpu' in subprocess.check_output([sys.executable, '-m', 'pip', 'freeze'], env=os.environ.copy()): log.warn('found tensorflow-gpu installed, so upgrading it instead of tensorflow') packages = [_tf_re.sub(r'tensorflow-gpu\1', package) for package in packages] result = subprocess.check_call([sys.executable, '-m', 'pip', 'install', - *[re.sub(r'\s', '', package) for package in packages]], + *[re.sub(r'\s', '', package) for package in packages]], env=os.environ.copy()) return result diff --git a/deeppavlov/utils/server/server.py b/deeppavlov/utils/server/server.py index fb8e12333f..c47710c0b2 100644 --- a/deeppavlov/utils/server/server.py +++ b/deeppavlov/utils/server/server.py @@ -25,8 +25,8 @@ from pydantic import BaseConfig, BaseModel, Schema from pydantic.fields import Field from pydantic.main import MetaModel -from starlette.responses import RedirectResponse from starlette.middleware.cors import CORSMiddleware +from starlette.responses import RedirectResponse from deeppavlov.core.commands.infer import build_model from deeppavlov.core.commands.utils import parse_config @@ -42,6 +42,7 @@ class ProbeFilter(logging.Filter): """ProbeFilter class is used to filter POST requests to /probe endpoint from logs.""" + def filter(self, record: logging.LogRecord) -> bool: """To log the record method should return True.""" return 'POST /probe HTTP' not in record.getMessage() @@ -116,6 +117,7 @@ def get_ssl_params(server_params: dict, def redirect_root_to_docs(fast_app: FastAPI, func_name: str, endpoint: str, method: str) -> None: """Adds api route to server that redirects user from root to docs with opened `endpoint` description.""" + @fast_app.get('/', include_in_schema=False) async def redirect_to_docs() -> RedirectResponse: operation_id = generate_operation_id_for_path(name=func_name, path=endpoint, method=method) @@ -192,6 +194,7 @@ class Batch(BaseModel): redirect_root_to_docs(app, 'answer', model_endpoint, 'post') model_endpoint_post_example = {arg_name: ['string'] for arg_name in model_args_names} + @app.post(model_endpoint, summary='A model endpoint') async def answer(item: Batch = Body(..., example=model_endpoint_post_example)) -> List: loop = asyncio.get_event_loop() diff --git a/deeppavlov/utils/settings/log_config.json b/deeppavlov/utils/settings/log_config.json index 0f25e18038..0b5ed38b0e 100644 --- a/deeppavlov/utils/settings/log_config.json +++ b/deeppavlov/utils/settings/log_config.json @@ -23,7 +23,7 @@ "datefmt": "%Y-%m-%d %H:%M:%S" }, "uvicorn_fmt": { - "format":"%(asctime)s %(message)s", + "format": "%(asctime)s %(message)s", "datefmt": "%Y-%m-%d %H:%M:%S" } }, diff --git a/deeppavlov/utils/settings/socket_config.json b/deeppavlov/utils/settings/socket_config.json index 53c6b638dd..17d32dde47 100644 --- a/deeppavlov/utils/settings/socket_config.json +++ b/deeppavlov/utils/settings/socket_config.json @@ -1,5 +1,5 @@ { - "common_defaults":{ + "common_defaults": { "host": "0.0.0.0", "port": 5001, "unix_socket_file": "/tmp/deeppavlov_socket.s", diff --git a/deeppavlov/vocabs/typos.py b/deeppavlov/vocabs/typos.py index a1fb1b98b0..c6266f655f 100644 --- a/deeppavlov/vocabs/typos.py +++ b/deeppavlov/vocabs/typos.py @@ -45,7 +45,7 @@ class StaticDictionary: words_trie: trie structure of all the words """ - def __init__(self, data_dir: [Path, str]='', *args, dictionary_name: str='dictionary', **kwargs): + def __init__(self, data_dir: [Path, str] = '', *args, dictionary_name: str = 'dictionary', **kwargs): data_dir = expand_path(data_dir) / dictionary_name alphabet_path = data_dir / 'alphabet.pkl' @@ -71,7 +71,7 @@ def __init__(self, data_dir: [Path, str]='', *args, dictionary_name: str='dictio words_trie = defaultdict(set) for word in words: for i in range(len(word)): - words_trie[word[:i]].add(word[:i+1]) + words_trie[word[:i]].add(word[:i + 1]) words_trie[word] = set() words_trie = {k: sorted(v) for k, v in words_trie.items()} @@ -113,7 +113,7 @@ class RussianWordsVocab(StaticDictionary): words_trie: trie structure of all the words """ - def __init__(self, data_dir: [Path, str]='', *args, **kwargs): + def __init__(self, data_dir: [Path, str] = '', *args, **kwargs): kwargs['dictionary_name'] = 'russian_words_vocab' super().__init__(data_dir, *args, **kwargs) @@ -140,7 +140,8 @@ class Wiki100KDictionary(StaticDictionary): words_set: set of all the words words_trie: trie structure of all the words """ - def __init__(self, data_dir: [Path, str]='', *args, **kwargs): + + def __init__(self, data_dir: [Path, str] = '', *args, **kwargs): kwargs['dictionary_name'] = 'wikipedia_100K_vocab' super().__init__(data_dir, *args, **kwargs) diff --git a/setup.py b/setup.py index d7a13015d8..2f19c3de5b 100644 --- a/setup.py +++ b/setup.py @@ -58,15 +58,15 @@ def readme(): keywords=deeppavlov.__keywords__, include_package_data=True, extras_require={ - 'tests': [ - 'flake8', - 'pytest', - 'pexpect'], - 'docs': [ - 'sphinx>=1.7.9', - 'sphinx_rtd_theme>=0.4.0', - 'nbsphinx>=0.3.4', - 'ipykernel>=4.8.0' - ]}, + 'tests': [ + 'flake8', + 'pytest', + 'pexpect'], + 'docs': [ + 'sphinx>=1.7.9', + 'sphinx_rtd_theme>=0.4.0', + 'nbsphinx>=0.3.4', + 'ipykernel>=4.8.0' + ]}, **read_requirements() ) diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py index 59cc944b39..2b131b6b57 100644 --- a/tests/test_quick_start.py +++ b/tests/test_quick_start.py @@ -214,20 +214,20 @@ }, "seq2seq_go_bot": { ("seq2seq_go_bot/bot_kvret_train.json", "seq2seq_go_bot", ('TI',)): - [ - ("will it snow on tuesday?", - "f78cf0f9-7d1e-47e9-aa45-33f9942c94be", - "", - "", - "", - None) - ], + [ + ("will it snow on tuesday?", + "f78cf0f9-7d1e-47e9-aa45-33f9942c94be", + "", + "", + "", + None) + ], ("seq2seq_go_bot/bot_kvret.json", "seq2seq_go_bot", ('IP',)): - [ - ("will it snow on tuesday?", - "f78cf0f9-7d1e-47e9-aa45-33f9942c94be", - None) - ] + [ + ("will it snow on tuesday?", + "f78cf0f9-7d1e-47e9-aa45-33f9942c94be", + None) + ] }, "odqa": { ("odqa/en_odqa_infer_wiki_test.json", "odqa", ('IP',)): [ONE_ARGUMENT_INFER_CHECK], @@ -373,7 +373,7 @@ def interact(config_path, model_directory, qr_list=None): p.expect(">> ") if expected_response is not None: actual_response = p.readline().decode().strip() - assert expected_response == actual_response,\ + assert expected_response == actual_response, \ f"Error in interacting with {model_directory} ({config_path}): {query}" p.expect("::") @@ -474,7 +474,7 @@ def interact_socket(config_path, socket_type): resp = json.loads(data) except json.decoder.JSONDecodeError: raise ValueError(f"Can't decode model response {data}") - assert resp['status'] == 'OK', f"{socket_type} socket request returned status: {resp['status']}"\ + assert resp['status'] == 'OK', f"{socket_type} socket request returned status: {resp['status']}" \ f" with {config_path}\n{logfile.getvalue().decode()}" except pexpect.exceptions.EOF: @@ -544,7 +544,7 @@ def test_consecutive_training_and_interacting(self, model, conf_file, model_dir, config_path = str(test_configs_path.joinpath(conf_file)) install_config(config_path) deep_download(config_path) - shutil.rmtree(str(model_path), ignore_errors=True) + shutil.rmtree(str(model_path), ignore_errors=True) logfile = io.BytesIO(b'') p = pexpect.popen_spawn.PopenSpawn(sys.executable + " -m deeppavlov train " + str(c), timeout=None, @@ -571,7 +571,7 @@ def test_crossvalidation(): install_config(c) deep_download(c) - shutil.rmtree(str(model_path), ignore_errors=True) + shutil.rmtree(str(model_path), ignore_errors=True) logfile = io.BytesIO(b'') p = pexpect.popen_spawn.PopenSpawn(sys.executable + f" -m deeppavlov crossval {c} --folds 2", @@ -596,7 +596,7 @@ def test_param_search(): install_config(c) deep_download(c) - shutil.rmtree(str(model_path), ignore_errors=True) + shutil.rmtree(str(model_path), ignore_errors=True) logfile = io.BytesIO(b'') p = pexpect.popen_spawn.PopenSpawn(sys.executable + f" -m deeppavlov.paramsearch {c} --folds 2", diff --git a/tests/test_tf_layers.py b/tests/test_tf_layers.py index e67ff61c8d..7ce26e31ca 100644 --- a/tests/test_tf_layers.py +++ b/tests/test_tf_layers.py @@ -146,7 +146,6 @@ def load(self, path): class TestTFLayers: - allowed_error_lvl = 0.01 * 2 ** 0.5 @staticmethod diff --git a/utils/prepare/hashes.py b/utils/prepare/hashes.py index dbcf149fda..e021beb6a3 100644 --- a/utils/prepare/hashes.py +++ b/utils/prepare/hashes.py @@ -24,7 +24,7 @@ from deeppavlov.core.data.utils import file_md5 -def tar_md5(fpath: Union[str, Path], chunk_size: int = 2**16) -> Dict[str, str]: +def tar_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> Dict[str, str]: tar = tarfile.open(fpath) res = {} while True: @@ -41,7 +41,7 @@ def tar_md5(fpath: Union[str, Path], chunk_size: int = 2**16) -> Dict[str, str]: return res -def gzip_md5(fpath: Union[str, Path], chunk_size: int = 2**16) -> str: +def gzip_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> str: file_hash = md5() with gzip.open(fpath, 'rb') as f: for chunk in iter(lambda: f.read(chunk_size), b""): @@ -49,7 +49,7 @@ def gzip_md5(fpath: Union[str, Path], chunk_size: int = 2**16) -> str: return file_hash.hexdigest() -def zip_md5(fpath: Union[str, Path], chunk_size: int = 2**16) -> Dict[str, str]: +def zip_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> Dict[str, str]: res = {} with ZipFile(fpath) as zip_f: for item in zip_f.infolist(): diff --git a/utils/prepare/registry.py b/utils/prepare/registry.py index ee1f8eb8d6..2571ede8d3 100644 --- a/utils/prepare/registry.py +++ b/utils/prepare/registry.py @@ -24,7 +24,7 @@ C_REGISTRY.clear() M_REGISTRY.clear() - for _, pkg_name, _ in pkgutil.walk_packages(deeppavlov.__path__, deeppavlov.__name__+'.'): + for _, pkg_name, _ in pkgutil.walk_packages(deeppavlov.__path__, deeppavlov.__name__ + '.'): if pkg_name not in ('deeppavlov.core.common.registry', 'deeppavlov.core.common.metrics_registry'): reload(import_module(pkg_name))