style: code style fixes (deeppavlov#1046)

* fix: automatic code style correction * fix: squad_iterator * fix: delete document_bert_ner_iterator * fix: revert json files to dev version * fix: removed from registry * refactor: fix merge mistakes
molodiuc · Oct 31, 2019 · 2e6db6f · 2e6db6f
1 parent 2420556
commit 2e6db6f
Show file tree

Hide file tree

Showing 171 changed files with 815 additions and 953 deletions.
diff --git a/deeppavlov/__init__.py b/deeppavlov/__init__.py
@@ -26,11 +26,13 @@
     from .download import deep_download
     from .core.common.chainer import Chainer
 
+
     # TODO: make better
     def train_model(config: [str, Path, dict], download: bool = False, recursive: bool = False) -> Chainer:
         train_evaluate_model_from_config(config, download=download, recursive=recursive)
         return build_model(config, load_trained=True)
 
+
     def evaluate_model(config: [str, Path, dict], download: bool = False, recursive: bool = False) -> dict:
         return train_evaluate_model_from_config(config, to_train=False, download=download, recursive=recursive)
 

diff --git a/deeppavlov/__main__.py b/deeppavlov/__main__.py
@@ -1,3 +1,4 @@
 if __name__ == '__main__':
     from .deep import main
+
     main()
diff --git a/deeppavlov/core/common/chainer.py b/deeppavlov/core/common/chainer.py
@@ -46,6 +46,7 @@ class Chainer(Component):
         out_params: names of pipeline inference outputs
         in_y: names of additional inputs for pipeline training and evaluation modes
     """
+
     def __init__(self, in_x: Union[str, list] = None, out_params: Union[str, list] = None,
                  in_y: Union[str, list] = None, *args, **kwargs) -> None:
         self.pipe: List[Tuple[Tuple[List[str], List[str]], List[str], Component]] = []
@@ -150,17 +151,17 @@ def append(self, component: Union[Component, FunctionType], in_x: [str, list, di
 
             component: NNModel
             main = True
-            assert self.train_map.issuperset(in_x+in_y), ('Arguments {} are expected but only {} are set'
-                                                          .format(in_x+in_y, self.train_map))
-            preprocessor = Chainer(self.in_x, in_x+in_y, self.in_y)
+            assert self.train_map.issuperset(in_x + in_y), ('Arguments {} are expected but only {} are set'
+                                                            .format(in_x + in_y, self.train_map))
+            preprocessor = Chainer(self.in_x, in_x + in_y, self.in_y)
             for (t_in_x_keys, t_in_x), t_out, t_component in self.train_pipe:
                 if t_in_x_keys:
                     t_in_x = dict(zip(t_in_x_keys, t_in_x))
                 preprocessor.append(t_component, t_in_x, t_out)
 
             def train_on_batch(*args, **kwargs):
                 preprocessed = preprocessor.compute(*args, **kwargs)
-                if len(in_x+in_y) == 1:
+                if len(in_x + in_y) == 1:
                     preprocessed = [preprocessed]
                 if keys:
                     return component.train_on_batch(**dict(zip(keys, preprocessed)))

diff --git a/deeppavlov/core/common/check_gpu.py b/deeppavlov/core/common/check_gpu.py
@@ -19,7 +19,6 @@
 
 log = getLogger(__name__)
 
-
 _gpu_available = None
 
 

diff --git a/deeppavlov/core/common/errors.py b/deeppavlov/core/common/errors.py
@@ -19,6 +19,7 @@
 
 class ConfigError(Exception):
     """Any configuration error."""
+
     def __init__(self, message):
         super(ConfigError, self).__init__()
         self.message = message

diff --git a/deeppavlov/core/common/metrics_registry.py b/deeppavlov/core/common/metrics_registry.py
@@ -29,13 +29,15 @@ def fn_from_str(name: str) -> Callable[..., Any]:
 
 def register_metric(metric_name: str) -> Callable[..., Any]:
     """Decorator for metric registration."""
+
     def decorate(fn):
         fn_name = fn.__module__ + ':' + fn.__name__
         if metric_name in _REGISTRY and _REGISTRY[metric_name] != fn_name:
             log.warning('"{}" is already registered as a metric name, the old function will be ignored'
                         .format(metric_name))
         _REGISTRY[metric_name] = fn_name
         return fn
+
     return decorate
 
 

diff --git a/deeppavlov/core/common/params.py b/deeppavlov/core/common/params.py
@@ -82,7 +82,7 @@ def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwa
         _refs.clear()
         _refs.update(refs)
         try:
-            _refs[config_params['id']] = model 
+            _refs[config_params['id']] = model
         except KeyError:
             pass
         return model
@@ -100,7 +100,7 @@ def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwa
 
         try:
             spec = inspect.getfullargspec(obj)
-            if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None:
+            if 'mode' in spec.args + spec.kwonlyargs or spec.varkw is not None:
                 kwargs['mode'] = mode
 
             component = obj(**dict(config_params, **kwargs))

diff --git a/deeppavlov/core/common/prints.py b/deeppavlov/core/common/prints.py
@@ -18,5 +18,6 @@
 
 class RedirectedPrints(redirect_stdout):
     """Context manager for temporarily redirecting stdout to another stream """
+
     def __init__(self, new_target=sys.stderr):
         super().__init__(new_target=new_target)
diff --git a/deeppavlov/core/common/registry.json b/deeppavlov/core/common/registry.json
@@ -33,7 +33,6 @@
   "dialog_state": "deeppavlov.models.seq2seq_go_bot.dialog_state:DialogState",
   "dictionary_vectorizer": "deeppavlov.models.vectorizers.word_vectorizer:DictionaryVectorizer",
   "dirty_comments_preprocessor": "deeppavlov.models.preprocessors.dirty_comments_preprocessor:DirtyCommentsPreprocessor",
-  "document_bert_ner_iterator": "deeppavlov.dataset_iterators.document_bert_ner_iterator:DocumentBertNerIterator",
   "document_chunker": "deeppavlov.models.preprocessors.odqa_preprocessors:DocumentChunker",
   "dstc2_intents_iterator": "deeppavlov.dataset_iterators.dstc2_intents_iterator:Dstc2IntentsDatasetIterator",
   "dstc2_ner_iterator": "deeppavlov.dataset_iterators.dstc2_ner_iterator:Dstc2NerDatasetIterator",

diff --git a/deeppavlov/core/common/registry.py b/deeppavlov/core/common/registry.py
@@ -45,6 +45,7 @@ def register(name: str = None) -> type:
     Register classes that could be initialized from JSON configuration file.
     If name is not passed, the class name is converted to snake-case.
     """
+
     def decorate(model_cls: type, reg_name: str = None) -> type:
         model_name = reg_name or short_name(model_cls)
         global _REGISTRY

diff --git a/deeppavlov/core/data/data_learning_iterator.py b/deeppavlov/core/data/data_learning_iterator.py
@@ -31,6 +31,7 @@ class DataLearningIterator:
         shuffle: whether to shuffle data during batching
         random: instance of ``Random`` initialized with a seed
     """
+
     def split(self, *args, **kwargs):
         """ Manipulate self.train, self.valid, and self.test into their final form. """
         pass

diff --git a/deeppavlov/core/data/simple_vocab.py b/deeppavlov/core/data/simple_vocab.py
@@ -40,9 +40,10 @@ class SimpleVocabulary(Estimator):
         unk_token: label assigned to unknown tokens.
         freq_drop_load: if True, then frequencies of tokens are set to min_freq on the model load.
         """
+
     def __init__(self,
                  special_tokens: Tuple[str, ...] = tuple(),
-                 max_tokens: int = 2**30,
+                 max_tokens: int = 2 ** 30,
                  min_freq: int = 0,
                  pad_with_zeros: bool = False,
                  unk_token: Optional[str] = None,
@@ -118,7 +119,7 @@ def load(self):
                 self._add_tokens_with_freqs(tokens, counts)
             elif not self.load_path.parent.is_dir():
                 raise ConfigError("Provided `load_path` for {} doesn't exist!".format(
-                                  self.__class__.__name__))
+                    self.__class__.__name__))
         else:
             raise ConfigError("`load_path` for {} is not provided!".format(self))
 
@@ -135,7 +136,7 @@ def load_line(self, ln):
         else:
             token, cnt = ln.split('\t', 1)
         return token, cnt
-        
+
     @property
     def len(self):
         return len(self)

diff --git a/deeppavlov/core/data/utils.py b/deeppavlov/core/data/utils.py
@@ -279,7 +279,7 @@ def _copytree(src: Path, dest: Path) -> None:
             shutil.copy(str(f), str(f_dest))
 
 
-def file_md5(fpath: Union[str, Path], chunk_size: int = 2**16) -> Optional[str]:
+def file_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> Optional[str]:
     """Return md5 hash value for file contents.
 
     Args:

diff --git a/deeppavlov/core/layers/keras_layers.py b/deeppavlov/core/layers/keras_layers.py
@@ -33,7 +33,7 @@ def expand_tile(units, axis):
     repetitions = [1, 1, 1, 1]
     repetitions[axis] = n_time_steps
     if axis == 1:
-        expanded = Reshape(target_shape=( (1,) + K.int_shape(units)[1:] ))(units)
+        expanded = Reshape(target_shape=((1,) + K.int_shape(units)[1:]))(units)
     else:
         expanded = Reshape(target_shape=(K.int_shape(units)[1:2] + (1,) + K.int_shape(units)[2:]))(units)
     return K.tile(expanded, repetitions)
@@ -113,9 +113,9 @@ def build(self, input_shape):
         self.W = []
         for i in range(self.output_dim):
             self.W.append(self.add_weight(name='kernel',
-                                  shape=(1, input_shape[0][-1]),
-                                      initializer='uniform',
-                                      trainable=True))
+                                          shape=(1, input_shape[0][-1]),
+                                          initializer='uniform',
+                                          trainable=True))
         super(FullMatchingLayer, self).build(input_shape)  # Be sure to call this at the end
 
     def call(self, x):
@@ -153,9 +153,9 @@ def build(self, input_shape):
         self.W = []
         for i in range(self.output_dim):
             self.W.append(self.add_weight(name='kernel',
-                                  shape=(1, input_shape[0][-1]),
-                                      initializer='uniform',
-                                      trainable=True))
+                                          shape=(1, input_shape[0][-1]),
+                                          initializer='uniform',
+                                          trainable=True))
         super(MaxpoolingMatchingLayer, self).build(input_shape)  # Be sure to call this at the end
 
     def call(self, x):
@@ -193,9 +193,9 @@ def build(self, input_shape):
         self.W = []
         for i in range(self.output_dim):
             self.W.append(self.add_weight(name='kernel',
-                                  shape=(1, input_shape[0][-1]),
-                                      initializer='uniform',
-                                      trainable=True))
+                                          shape=(1, input_shape[0][-1]),
+                                          initializer='uniform',
+                                          trainable=True))
         super(AttentiveMatchingLayer, self).build(input_shape)  # Be sure to call this at the end
 
     def call(self, x):
@@ -241,9 +241,9 @@ def build(self, input_shape):
         self.W = []
         for i in range(self.output_dim):
             self.W.append(self.add_weight(name='kernel',
-                                  shape=(1, input_shape[0][-1]),
-                                      initializer='uniform',
-                                      trainable=True))
+                                          shape=(1, input_shape[0][-1]),
+                                          initializer='uniform',
+                                          trainable=True))
         super(MaxattentiveMatchingLayer, self).build(input_shape)  # Be sure to call this at the end
 
     def call(self, x):

diff --git a/deeppavlov/core/layers/tf_attention_mechanisms.py b/deeppavlov/core/layers/tf_attention_mechanisms.py
@@ -47,8 +47,8 @@ def general_attention(key, context, hidden_size, projected_align=False):
         tf.layers.dense(key, hidden_size, kernel_initializer=xav())
     r_projected_key = tf.reshape(projected_key, shape=[-1, hidden_size, 1])
 
-    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
-    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
+    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
+    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
     (output_fw, output_bw), states = \
         tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,
                                         cell_bw=lstm_bw_cell,
@@ -139,8 +139,8 @@ def cs_general_attention(key, context, hidden_size, depth, projected_align=False
                                         kernel_initializer=xav(),
                                         name='projected_context')
 
-    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
-    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
+    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
+    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
     (output_fw, output_bw), states = \
         tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,
                                         cell_bw=lstm_bw_cell,
@@ -192,8 +192,8 @@ def bahdanau_attention(key, context, hidden_size, projected_align=False):
         tf.tile(tf.reshape(projected_key, shape=[-1, 1, hidden_size]),
                 [1, max_num_tokens, 1])
 
-    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
-    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
+    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
+    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
     (output_fw, output_bw), states = \
         tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,
                                         cell_bw=lstm_bw_cell,
@@ -308,8 +308,8 @@ def cs_bahdanau_attention(key, context, hidden_size, depth, projected_align=Fals
         tf.tile(tf.reshape(projected_key, shape=[-1, 1, hidden_size]),
                 [1, max_num_tokens, 1])
 
-    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
-    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size//2)
+    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
+    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size // 2)
     (output_fw, output_bw), states = \
         tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,
                                         cell_bw=lstm_bw_cell,

diff --git a/deeppavlov/core/layers/tf_csoftmax_attention.py b/deeppavlov/core/layers/tf_csoftmax_attention.py
@@ -114,28 +114,27 @@ def attention_gen_step(hidden_for_sketch, hidden_for_attn_alignment, sketch, key
         attn_alignment_dims = hidden_for_attn_alignment.get_shape().as_list()
         attn_alignment_hidden_size = attn_alignment_dims[2]
 
-        repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1,num_tokens, 1))
-        concat_mem = tf.concat([hidden_for_sketch, repeated_sketch],-1)
+        repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1, num_tokens, 1))
+        concat_mem = tf.concat([hidden_for_sketch, repeated_sketch], -1)
 
-
-        concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2*hidden_size]) # dirty trick
+        concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2 * hidden_size])  # dirty trick
         reduce_mem = tf.layers.dense(concat_mem, hidden_size)
 
         projected_key = tf.layers.dense(key, hidden_size)
-        t_key = tf.reshape(projected_key,[-1, hidden_size, 1])
+        t_key = tf.reshape(projected_key, [-1, hidden_size, 1])
 
         score = tf.reshape(tf.matmul(reduce_mem, t_key), [-1, num_tokens])
 
         inv_cum_att = tf.reshape(tf.ones_like(cum_att) - cum_att, [-1, num_tokens])
         att = csoftmax(score, inv_cum_att)
 
-        t_reduce_mem = tf.transpose(reduce_mem, [0,2,1])
-        t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0,2,1])
+        t_reduce_mem = tf.transpose(reduce_mem, [0, 2, 1])
+        t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0, 2, 1])
 
         r_att = tf.reshape(att, [-1, num_tokens, 1])
 
-        next_sketch = tf.squeeze(tf.matmul(t_reduce_mem,r_att),-1)
-        aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment,r_att),-1)
+        next_sketch = tf.squeeze(tf.matmul(t_reduce_mem, r_att), -1)
+        aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment, r_att), -1)
     return next_sketch, att, aligned_hidden_sketch
 
 
@@ -165,11 +164,13 @@ def attention_gen_block(hidden_for_sketch, hidden_for_attn_alignment, key, atten
         aligned_hiddens = []
         cum_att = tf.zeros(shape=[batch_size, num_tokens])  # cumulative attention
         for i in range(attention_depth):
-            sketch, cum_att_, aligned_hidden = attention_gen_step(hidden_for_sketch, hidden_for_attn_alignment, sketches[-1], key, cum_att)
-            sketches.append(sketch) #sketch
-            aligned_hiddens.append(aligned_hidden) #sketch
+            sketch, cum_att_, aligned_hidden = attention_gen_step(hidden_for_sketch, hidden_for_attn_alignment,
+                                                                  sketches[-1], key, cum_att)
+            sketches.append(sketch)  # sketch
+            aligned_hiddens.append(aligned_hidden)  # sketch
             cum_att += cum_att_
-        final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]),[1, attention_depth, attn_alignment_hidden_size])
+        final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]),
+                                           [1, attention_depth, attn_alignment_hidden_size])
     return final_aligned_hiddens
 
 
@@ -197,25 +198,24 @@ def attention_bah_step(hidden_for_sketch, hidden_for_attn_alignment, sketch, cum
         attn_alignment_dims = hidden_for_attn_alignment.get_shape().as_list()
         attn_alignment_hidden_size = attn_alignment_dims[2]
 
-        repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1,num_tokens, 1))
-        concat_mem = tf.concat([hidden_for_sketch, repeated_sketch],-1)
-
+        repeated_sketch = tf.tile(tf.reshape(sketch, [-1, 1, hidden_size]), (1, num_tokens, 1))
+        concat_mem = tf.concat([hidden_for_sketch, repeated_sketch], -1)
 
-        concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2*hidden_size]) # dirty trick
+        concat_mem = tf.reshape(concat_mem, [-1, num_tokens, 2 * hidden_size])  # dirty trick
         reduce_mem = tf.layers.dense(concat_mem, hidden_size)
 
-        score = tf.squeeze(tf.layers.dense(reduce_mem, units = 1,
-                                    use_bias=False),-1)
+        score = tf.squeeze(tf.layers.dense(reduce_mem, units=1,
+                                           use_bias=False), -1)
         inv_cum_att = tf.reshape(tf.ones_like(cum_att) - cum_att, [-1, num_tokens])
         att = csoftmax(score, inv_cum_att)
 
-        t_reduce_mem = tf.transpose(reduce_mem, [0,2,1])
-        t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0,2,1])
+        t_reduce_mem = tf.transpose(reduce_mem, [0, 2, 1])
+        t_hidden_for_attn_alignment = tf.transpose(hidden_for_attn_alignment, [0, 2, 1])
 
         r_att = tf.reshape(att, [-1, num_tokens, 1])
 
-        next_sketch = tf.squeeze(tf.matmul(t_reduce_mem,r_att),-1)
-        aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment,r_att),-1)
+        next_sketch = tf.squeeze(tf.matmul(t_reduce_mem, r_att), -1)
+        aligned_hidden_sketch = tf.squeeze(tf.matmul(t_hidden_for_attn_alignment, r_att), -1)
     return next_sketch, att, aligned_hidden_sketch
 
 
@@ -245,9 +245,11 @@ def attention_bah_block(hidden_for_sketch, hidden_for_attn_alignment, attention_
         aligned_hiddens = []
         cum_att = tf.zeros(shape=[batch_size, num_tokens])  # cumulative attention
         for i in range(attention_depth):
-            sketch, cum_att_, aligned_hidden = attention_bah_step(hidden_for_sketch, hidden_for_attn_alignment, sketches[-1], cum_att)
-            sketches.append(sketch) #sketch
-            aligned_hiddens.append(aligned_hidden) #sketch
+            sketch, cum_att_, aligned_hidden = attention_bah_step(hidden_for_sketch, hidden_for_attn_alignment,
+                                                                  sketches[-1], cum_att)
+            sketches.append(sketch)  # sketch
+            aligned_hiddens.append(aligned_hidden)  # sketch
             cum_att += cum_att_
-        final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]),[1, attention_depth, attn_alignment_hidden_size])
+        final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]),
+                                           [1, attention_depth, attn_alignment_hidden_size])
     return final_aligned_hiddens
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,7 +19,6 @@

		log = getLogger(__name__)


		_gpu_available = None


Expand Down