diff --git a/deeppavlov/configs/nemo/asr.json b/deeppavlov/configs/nemo/asr.json
new file mode 100644
index 0000000000..402d258337
--- /dev/null
+++ b/deeppavlov/configs/nemo/asr.json
@@ -0,0 +1,29 @@
+{
+  "chainer": {
+    "in": "speech",
+    "pipe": [
+      {
+        "class_name": "nemo_asr",
+        "nemo_params_path": "{NEMO_PATH}/quartznet15x5/quartznet15x5.yaml",
+        "load_path": "{NEMO_PATH}/quartznet15x5",
+        "in": ["speech"],
+        "out": ["text"]
+      }
+    ],
+    "out": ["text"]
+  },
+  "metadata": {
+    "variables": {
+      "NEMO_PATH": "~/.deeppavlov/models/nemo"
+    },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt"
+    ],
+    "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz",
+        "subdir": "{NEMO_PATH}"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/deeppavlov/configs/nemo/asr_tts.json b/deeppavlov/configs/nemo/asr_tts.json
new file mode 100644
index 0000000000..3cb01515cf
--- /dev/null
+++ b/deeppavlov/configs/nemo/asr_tts.json
@@ -0,0 +1,52 @@
+{
+  "chainer": {
+    "in": "speech_in_encoded",
+    "pipe": [
+      {
+        "class_name": "base64_encode_bytesIO",
+        "in": ["speech_in_encoded"],
+        "out": ["speech_in"]
+      },
+      {
+        "class_name": "nemo_asr",
+        "nemo_params_path": "{NEMO_PATH}/quartznet15x5/quartznet15x5.yaml",
+        "load_path": "{NEMO_PATH}/quartznet15x5",
+        "in": ["speech_in"],
+        "out": ["text"]
+      },
+      {
+        "class_name": "nemo_tts",
+        "nemo_params_path": "{TTS_PATH}/tacotron2_waveglow.yaml",
+        "load_path": "{TTS_PATH}",
+        "in": ["text"],
+        "out": ["speech_out"]
+      },
+      {
+        "class_name": "bytesIO_decode_base64",
+        "in": ["speech_out"],
+        "out": ["speech_out_encoded"]
+      }
+    ],
+    "out": ["text", "speech_out_encoded"]
+  },
+  "metadata": {
+    "variables": {
+      "NEMO_PATH": "~/.deeppavlov/models/nemo",
+      "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow"
+    },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt",
+      "{DEEPPAVLOV_PATH}/requirements/nemo-tts.txt"
+    ],
+    "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz",
+        "subdir": "{NEMO_PATH}"
+      },
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/tacotron2_waveglow.tar.gz",
+        "subdir": "{NEMO_PATH}"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/deeppavlov/configs/nemo/tts.json b/deeppavlov/configs/nemo/tts.json
new file mode 100644
index 0000000000..8973604539
--- /dev/null
+++ b/deeppavlov/configs/nemo/tts.json
@@ -0,0 +1,31 @@
+{
+  "chainer": {
+    "in": ["text", "filepath"],
+    "pipe": [
+      {
+        "class_name": "nemo_tts",
+        "nemo_params_path": "{TTS_PATH}/tacotron2_waveglow.yaml",
+        "load_path": "{TTS_PATH}",
+        "in": ["text", "filepath"],
+        "out": ["saved_path"]
+      }
+    ],
+    "out": ["saved_path"]
+  },
+  "metadata": {
+    "variables": {
+      "NEMO_PATH": "~/.deeppavlov/models/nemo",
+      "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow"
+    },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt",
+      "{DEEPPAVLOV_PATH}/requirements/nemo-tts.txt"
+    ],
+    "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/tacotron2_waveglow.tar.gz",
+        "subdir": "{NEMO_PATH}"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/deeppavlov/core/common/file.py b/deeppavlov/core/common/file.py
index e502b6b139..6079c68203 100644
--- a/deeppavlov/core/common/file.py
+++ b/deeppavlov/core/common/file.py
@@ -19,6 +19,8 @@
 from pathlib import Path
 from typing import Union, Any
 
+from ruamel.yaml import YAML
+
 log = getLogger(__name__)
 
 
@@ -50,3 +52,9 @@ def save_pickle(data: dict, fpath: Union[str, Path]) -> None:
 def load_pickle(fpath: Union[str, Path]) -> Any:
     with open(fpath, 'rb') as fin:
         return pickle.load(fin)
+
+
+def read_yaml(fpath: Union[str, Path]) -> dict:
+    yaml = YAML(typ="safe")
+    with open(fpath, encoding='utf8') as fin:
+        return yaml.load(fin)
diff --git a/deeppavlov/core/common/registry.json b/deeppavlov/core/common/registry.json
index 042dc5c411..d2c1b14d1b 100644
--- a/deeppavlov/core/common/registry.json
+++ b/deeppavlov/core/common/registry.json
@@ -6,6 +6,7 @@
   "api_router": "deeppavlov.models.api_requester.api_router:ApiRouter",
   "basic_classification_iterator": "deeppavlov.dataset_iterators.basic_classification_iterator:BasicClassificationDatasetIterator",
   "basic_classification_reader": "deeppavlov.dataset_readers.basic_classification_reader:BasicClassificationDatasetReader",
+  "base64_encode_bytesIO": "deeppavlov.models.nemo.common:ascii_to_bytes_io",
   "bert_as_summarizer": "deeppavlov.models.bert.bert_as_summarizer:BertAsSummarizer",
   "bert_classifier": "deeppavlov.models.bert.bert_classifier:BertClassifierModel",
   "bert_ner_preprocessor": "deeppavlov.models.preprocessors.bert_preprocessor:BertNerPreprocessor",
@@ -22,6 +23,7 @@
   "bilstm_gru_nn": "deeppavlov.models.ranking.bilstm_gru_siamese_network:BiLSTMGRUSiameseNetwork",
   "bilstm_nn": "deeppavlov.models.ranking.bilstm_siamese_network:BiLSTMSiameseNetwork",
   "bow": "deeppavlov.models.embedders.bow_embedder:BoWEmbedder",
+  "bytesIO_decode_base64": "deeppavlov.models.nemo.common:bytes_io_to_ascii",
   "capitalization_featurizer": "deeppavlov.models.preprocessors.capitalization:CapitalizationPreprocessor",
   "char_splitter": "deeppavlov.models.preprocessors.char_splitter:CharSplitter",
   "char_splitting_lowercase_preprocessor": "deeppavlov.models.preprocessors.capitalization:CharSplittingLowercasePreprocessor",
@@ -85,6 +87,8 @@
   "multi_squad_dataset_reader": "deeppavlov.dataset_readers.squad_dataset_reader:MultiSquadDatasetReader",
   "multi_squad_iterator": "deeppavlov.dataset_iterators.squad_iterator:MultiSquadIterator",
   "multi_squad_retr_iterator": "deeppavlov.dataset_iterators.squad_iterator:MultiSquadRetrIterator",
+  "nemo_asr": "deeppavlov.models.nemo.asr:NeMoASR",
+  "nemo_tts": "deeppavlov.models.nemo.tts:NeMoTTS",
   "ner": "deeppavlov.models.ner.network:NerNetwork",
   "ner_bio_converter": "deeppavlov.models.ner.bio:BIOMarkupRestorer",
   "ner_few_shot_iterator": "deeppavlov.dataset_iterators.ner_few_shot_iterator:NERFewShotIterator",
diff --git a/deeppavlov/models/nemo/__init__.py b/deeppavlov/models/nemo/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/deeppavlov/models/nemo/asr.py b/deeppavlov/models/nemo/asr.py
new file mode 100644
index 0000000000..70527adea3
--- /dev/null
+++ b/deeppavlov/models/nemo/asr.py
@@ -0,0 +1,193 @@
+# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from io import BytesIO
+from pathlib import Path
+from typing import List, Optional, Tuple, Union, Dict
+
+import torch
+from nemo.collections.asr import AudioToMelSpectrogramPreprocessor, JasperEncoder, JasperDecoderForCTC, GreedyCTCDecoder
+from nemo.collections.asr.helpers import post_process_predictions
+from nemo.collections.asr.parts.features import WaveformFeaturizer
+from nemo.core.neural_types import AudioSignal, NeuralType, LengthsType
+from nemo.utils.decorators import add_port_docs
+from torch import Tensor
+from torch.utils.data import Dataset, DataLoader
+
+from deeppavlov.core.common.registry import register
+from deeppavlov.models.nemo.common import CustomDataLayerBase, NeMoBase
+
+log = logging.getLogger(__name__)
+
+
+class AudioInferDataset(Dataset):
+    def __init__(self, audio_batch: List[Union[str, BytesIO]], sample_rate: int, int_values: bool, trim=False) -> None:
+        """Dataset reader for AudioInferDataLayer.
+
+        Args:
+            audio_batch: Batch to be read. Elements could be either paths to audio files or Binary I/O objects.
+            sample_rate: Audio files sample rate.
+            int_values: If true, load samples as 32-bit integers.
+            trim: Trim leading and trailing silence from an audio signal if True.
+
+        """
+        self.audio_batch = audio_batch
+        self.featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values)
+        self.trim = trim
+
+    def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
+        """Processes audio batch item and extracts features.
+
+        Args:
+            index: Audio batch item index.
+
+        Returns:
+            features: Audio file's extracted features tensor.
+            features_length: Features length tensor.
+
+        """
+        sample = self.audio_batch[index]
+        features = self.featurizer.process(sample, trim=self.trim)
+        features_length = torch.tensor(features.shape[0]).long()
+
+        return features, features_length
+
+    def __len__(self) -> int:
+        return len(self.audio_batch)
+
+
+class AudioInferDataLayer(CustomDataLayerBase):
+    """Data Layer for ASR pipeline inference."""
+
+    @property
+    @add_port_docs()
+    def output_ports(self) -> Dict[str, NeuralType]:
+        return {
+            "audio_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
+            "a_sig_length": NeuralType(tuple('B'), LengthsType())
+        }
+
+    def __init__(self, *,
+                 audio_batch: List[Union[str, BytesIO]],
+                 batch_size: int = 32,
+                 sample_rate: int = 16000,
+                 int_values: bool = False,
+                 trim_silence: bool = False,
+                 **kwargs) -> None:
+        """Initializes Data Loader.
+
+        Args:
+            audio_batch: Batch to be read. Elements could be either paths to audio files or Binary I/O objects.
+            batch_size: How many samples per batch to load.
+            sample_rate: Target sampling rate for data. Audio files will be resampled to sample_rate if
+                it is not already.
+            int_values: If true, load data as 32-bit integers.
+            trim_silence: Trim leading and trailing silence from an audio signal if True.
+
+        """
+        self._sample_rate = sample_rate
+
+        dataset = AudioInferDataset(audio_batch=audio_batch, sample_rate=sample_rate, int_values=int_values,
+                                    trim=trim_silence)
+
+        dataloader = DataLoader(dataset=dataset, batch_size=batch_size, collate_fn=self.seq_collate_fn)
+        super(AudioInferDataLayer, self).__init__(dataset, dataloader, **kwargs)
+
+    @staticmethod
+    def seq_collate_fn(batch: Tuple[Tuple[Tensor], Tuple[Tensor]]) -> Tuple[Optional[Tensor], Optional[Tensor]]:
+        """Collates batch of audio signal and audio length, zero pads audio signal.
+
+        Args:
+            batch: A tuple of tuples of audio signals and signal lengths. This collate function assumes the signals
+                are 1d torch tensors (i.e. mono audio).
+
+        Returns:
+            audio_signal: Zero padded audio signal tensor.
+            audio_length: Audio signal length tensor.
+
+        """
+        _, audio_lengths = zip(*batch)
+        max_audio_len = 0
+        has_audio = audio_lengths[0] is not None
+        if has_audio:
+            max_audio_len = max(audio_lengths).item()
+
+        audio_signal = []
+        for sig, sig_len in batch:
+            if has_audio:
+                sig_len = sig_len.item()
+                if sig_len < max_audio_len:
+                    pad = (0, max_audio_len - sig_len)
+                    sig = torch.nn.functional.pad(sig, pad)
+                audio_signal.append(sig)
+
+        if has_audio:
+            audio_signal = torch.stack(audio_signal)
+            audio_lengths = torch.stack(audio_lengths)
+        else:
+            audio_signal, audio_lengths = None, None
+
+        return audio_signal, audio_lengths
+
+
+@register('nemo_asr')
+class NeMoASR(NeMoBase):
+    """ASR model on NeMo modules."""
+
+    def __init__(self, load_path: Union[str, Path], nemo_params_path: Union[str, Path], **kwargs) -> None:
+        """Initializes NeuralModules for ASR.
+
+        Args:
+            load_path: Path to a directory with pretrained checkpoints for JasperEncoder and JasperDecoderForCTC.
+            nemo_params_path: Path to a file containig labels and params for AudioToMelSpectrogramPreprocessor,
+                JasperEncoder, JasperDecoderForCTC and AudioInferDataLayer.
+
+        """
+        super(NeMoASR, self).__init__(load_path=load_path, nemo_params_path=nemo_params_path, **kwargs)
+
+        self.labels = self.nemo_params['labels']
+
+        self.data_preprocessor = AudioToMelSpectrogramPreprocessor(
+            **self.nemo_params['AudioToMelSpectrogramPreprocessor']
+        )
+        self.jasper_encoder = JasperEncoder(**self.nemo_params['JasperEncoder'])
+        self.jasper_decoder = JasperDecoderForCTC(num_classes=len(self.labels), **self.nemo_params['JasperDecoder'])
+        self.greedy_decoder = GreedyCTCDecoder()
+        self.modules_to_restore = [self.jasper_encoder, self.jasper_decoder]
+
+        self.load()
+
+    def __call__(self, audio_batch: List[Union[str, BytesIO]]) -> List[str]:
+        """Transcripts audio batch to text.
+
+        Args:
+            audio_batch: Batch to be transcribed. Elements could be either paths to audio files or Binary I/O objects.
+
+        Returns:
+            text_batch: Batch of transcripts.
+
+        """
+        data_layer = AudioInferDataLayer(audio_batch=audio_batch, **self.nemo_params['AudioToTextDataLayer'])
+        audio_signal, audio_signal_len = data_layer()
+        processed_signal, processed_signal_len = self.data_preprocessor(input_signal=audio_signal,
+                                                                        length=audio_signal_len)
+        encoded, encoded_len = self.jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
+        log_probs = self.jasper_decoder(encoder_output=encoded)
+        predictions = self.greedy_decoder(log_probs=log_probs)
+        eval_tensors = [predictions]
+        tensors = self.neural_factory.infer(tensors=eval_tensors)
+        text_batch = post_process_predictions(tensors[0], self.labels)
+
+        return text_batch
diff --git a/deeppavlov/models/nemo/common.py b/deeppavlov/models/nemo/common.py
new file mode 100644
index 0000000000..4fe2898195
--- /dev/null
+++ b/deeppavlov/models/nemo/common.py
@@ -0,0 +1,117 @@
+# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+from io import BytesIO
+from logging import getLogger
+from pathlib import Path
+from typing import Union
+
+import nemo
+import torch
+from nemo.backends.pytorch import DataLayerNM
+from torch.utils.data import Dataset, DataLoader
+
+from deeppavlov.core.commands.utils import expand_path
+from deeppavlov.core.common.file import read_yaml
+from deeppavlov.core.common.registry import register
+from deeppavlov.core.models.component import Component
+from deeppavlov.core.models.serializable import Serializable
+
+log = getLogger(__name__)
+
+
+@register('base64_encode_bytesIO')
+def ascii_to_bytes_io(batch: Union[str, list]) -> Union[BytesIO, list]:
+    """Recursively searches for strings in the input batch and converts them into the base64-encoded bytes wrapped in
+    Binary I/O objects.
+
+    Args:
+        batch: A string or an iterable container with strings at some level of nesting.
+
+    Returns:
+        The same structure where all strings are converted into the base64-encoded bytes wrapped in Binary I/O objects.
+
+    """
+    if isinstance(batch, str):
+        return BytesIO(base64.decodebytes(batch.encode()))
+
+    return list(map(ascii_to_bytes_io, batch))
+
+
+@register('bytesIO_decode_base64')
+def bytes_io_to_ascii(batch: Union[BytesIO, list]) -> Union[str, list]:
+    """Recursively searches for Binary I/O objects in the input batch and converts them into ASCII-strings.
+
+    Args:
+        batch: A BinaryIO object or an iterable container with BinaryIO objects at some level of nesting.
+
+    Returns:
+        The same structure where all BinaryIO objects are converted into strings.
+
+    """
+    if isinstance(batch, BytesIO):
+        return base64.encodebytes(batch.read()).decode('ascii')
+
+    return list(map(bytes_io_to_ascii, batch))
+
+
+class NeMoBase(Component, Serializable):
+    """Base class for NeMo Chainer's pipeline components."""
+
+    def __init__(self, load_path: Union[str, Path], nemo_params_path: Union[str, Path], **kwargs) -> None:
+        """Initializes NeuralModuleFactory on CPU or GPU and reads nemo modules params from yaml.
+
+        Args:
+            load_path: Path to a directory with pretrained checkpoints for NeMo modules.
+            nemo_params_path: Path to a file containig NeMo modules params.
+
+        """
+        super(NeMoBase, self).__init__(save_path=None, load_path=load_path, **kwargs)
+        placement = nemo.core.DeviceType.GPU if torch.cuda.is_available() else nemo.core.DeviceType.CPU
+        self.neural_factory = nemo.core.NeuralModuleFactory(placement=placement)
+        self.modules_to_restore = []
+        self.nemo_params = read_yaml(expand_path(nemo_params_path))
+
+    def __call__(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def load(self) -> None:
+        """Loads pretrained checkpoints for modules from self.modules_to_restore list."""
+        module_names = [str(module) for module in self.modules_to_restore]
+        checkpoints = nemo.utils.get_checkpoint_from_dir(module_names, self.load_path)
+        for module, checkpoint in zip(self.modules_to_restore, checkpoints):
+            log.info(f'Restoring {module} from {checkpoint}')
+            module.restore_from(checkpoint)
+
+    def save(self, *args, **kwargs) -> None:
+        pass
+
+
+class CustomDataLayerBase(DataLayerNM):
+    def __init__(self, dataset: Dataset, dataloader: DataLoader, **kwargs) -> None:
+        super(CustomDataLayerBase, self).__init__()
+        self._dataset = dataset
+        self._dataloader = dataloader
+
+    def __len__(self) -> int:
+        return len(self._dataset)
+
+    @property
+    def dataset(self) -> None:
+        return None
+
+    @property
+    def data_iterator(self) -> torch.utils.data.DataLoader:
+        return self._dataloader
diff --git a/deeppavlov/models/nemo/tts.py b/deeppavlov/models/nemo/tts.py
new file mode 100644
index 0000000000..d31fa0bcfb
--- /dev/null
+++ b/deeppavlov/models/nemo/tts.py
@@ -0,0 +1,210 @@
+# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+from io import BytesIO
+from logging import getLogger
+from pathlib import Path
+from typing import List, Optional, Tuple, Union, Dict
+
+import torch
+from nemo.collections.asr.parts import collections, parsers
+from nemo.collections.asr.parts.dataset import TranscriptDataset
+from nemo.collections.tts import TextEmbedding, Tacotron2Encoder, Tacotron2DecoderInfer, Tacotron2Postnet
+from nemo.core.neural_types import NeuralType, LabelsType, LengthsType
+from nemo.utils.decorators import add_port_docs
+from nemo.utils.misc import pad_to
+from scipy.io import wavfile
+from torch import Tensor
+
+from deeppavlov.core.commands.utils import expand_path
+from deeppavlov.core.common.registry import register
+from deeppavlov.models.nemo.common import CustomDataLayerBase, NeMoBase
+from deeppavlov.models.nemo.vocoder import WaveGlow, GriffinLim
+
+log = getLogger(__name__)
+
+
+class TextDataset(TranscriptDataset):
+    def __init__(self,
+                 text_batch: List[str],
+                 labels: List[str],
+                 bos_id: Optional[int] = None,
+                 eos_id: Optional[int] = None,
+                 lowercase: bool = True) -> None:
+        """Text dataset reader for TextDataLayer.
+
+        Args:
+            text_batch: Texts to be used for speech synthesis.
+            labels: List of string labels to use when to str2int translation.
+            bos_id: Label position of beginning of string symbol.
+            eos_id: Label position of end of string symbol.
+            lowercase: Whether to convert all uppercase characters in a text batch into lowercase characters.
+
+        """
+        parser = parsers.make_parser(labels, do_lowercase=lowercase)
+        self.texts = collections.Text(text_batch, parser)
+        self.bos_id = bos_id
+        self.eos_id = eos_id
+
+
+class TextDataLayer(CustomDataLayerBase):
+    @property
+    @add_port_docs()
+    def output_ports(self) -> Dict[str, NeuralType]:
+        return {
+            'texts': NeuralType(('B', 'T'), LabelsType()),
+            "texts_length": NeuralType(tuple('B'), LengthsType())
+        }
+
+    def __init__(self, *,
+                 text_batch: List[str],
+                 labels: List[str],
+                 batch_size: int = 32,
+                 bos_id: Optional[int] = None,
+                 eos_id: Optional[int] = None,
+                 pad_id: Optional[int] = None,
+                 **kwargs) -> None:
+        """A simple Neural Module for loading text data.
+
+        Args:
+            text_batch: Texts to be used for speech synthesis.
+            labels: List of string labels to use when to str2int translation.
+            batch_size: How many strings per batch to load.
+            bos_id: Label position of beginning of string symbol. If None is initialized as `len(labels)`.
+            eos_id: Label position of end of string symbol. If None is initialized as `len(labels) + 1`.
+            pad_id: Label position of pad symbol. If None is initialized as `len(labels) + 2`.
+
+        """
+        len_labels = len(labels)
+        if bos_id is None:
+            bos_id = len_labels
+        if eos_id is None:
+            eos_id = len_labels + 1
+        if pad_id is None:
+            pad_id = len_labels + 2
+
+        dataset = TextDataset(text_batch=text_batch, labels=labels, bos_id=bos_id, eos_id=eos_id)
+
+        dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size,
+                                                 collate_fn=partial(self._collate_fn, pad_id=pad_id))
+        super(TextDataLayer, self).__init__(dataset, dataloader, **kwargs)
+
+    @staticmethod
+    def _collate_fn(batch: Tuple[Tuple[Tensor], Tuple[Tensor]], pad_id: int) -> Tuple[Tensor, Tensor]:
+        """Collates batch of texts.
+
+        Args:
+            batch: A tuple of tuples of audio signals and signal lengths.
+            pad_id: Label position of pad symbol.
+
+        Returns:
+            texts: Padded texts tensor.
+            texts_len: Text lengths tensor.
+
+        """
+        texts_list, texts_len = zip(*batch)
+        max_len = max(texts_len)
+        max_len = pad_to(max_len, 8)
+
+        texts = torch.empty(len(texts_list), max_len, dtype=torch.long)
+        texts.fill_(pad_id)
+
+        for i, text in enumerate(texts_list):
+            texts[i].narrow(0, 0, text.size(0)).copy_(text)
+
+        if len(texts.shape) != 2:
+            raise ValueError(f'Texts in collate function have shape {texts.shape}, should have 2 dimensions.')
+
+        return texts, torch.stack(texts_len)
+
+
+@register('nemo_tts')
+class NeMoTTS(NeMoBase):
+    """TTS model on NeMo modules."""
+    def __init__(self,
+                 load_path: Union[str, Path],
+                 nemo_params_path: Union[str, Path],
+                 vocoder: str = 'waveglow',
+                 **kwargs) -> None:
+        """Initializes NeuralModules for TTS.
+
+        Args:
+            load_path: Path to a directory with pretrained checkpoints for TextEmbedding, Tacotron2Encoder,
+                Tacotron2DecoderInfer, Tacotron2Postnet and, if Waveglow vocoder is selected, WaveGlowInferNM.
+            nemo_params_path: Path to a file containig sample_rate, labels and params for TextEmbedding,
+                Tacotron2Encoder, Tacotron2Decoder, Tacotron2Postnet and TranscriptDataLayer.
+            vocoder: Vocoder used to convert from spectrograms to audio. Available options: `waveglow` (needs pretrained
+                checkpoint) and `griffin-lim`.
+
+        """
+        super(NeMoTTS, self).__init__(load_path=load_path, nemo_params_path=nemo_params_path, **kwargs)
+
+        self.sample_rate = self.nemo_params['sample_rate']
+        self.text_embedding = TextEmbedding(
+            len(self.nemo_params['labels']) + 3,  # + 3 special chars
+            **self.nemo_params['TextEmbedding']
+        )
+        self.t2_enc = Tacotron2Encoder(**self.nemo_params['Tacotron2Encoder'])
+        self.t2_dec = Tacotron2DecoderInfer(**self.nemo_params['Tacotron2Decoder'])
+        self.t2_postnet = Tacotron2Postnet(**self.nemo_params['Tacotron2Postnet'])
+        self.modules_to_restore = [self.text_embedding, self.t2_enc, self.t2_dec, self.t2_postnet]
+
+        if vocoder == 'waveglow':
+            self.vocoder = WaveGlow(**self.nemo_params['WaveGlowNM'])
+            self.modules_to_restore.append(self.vocoder)
+        elif vocoder == 'griffin-lim':
+            self.vocoder = GriffinLim(**self.nemo_params['GriffinLim'])
+        else:
+            raise ValueError(f'{vocoder} vocoder is not supported.')
+
+        self.load()
+
+    def __call__(self,
+                 text_batch: List[str],
+                 path_batch: Optional[List[str]] = None) -> Union[List[BytesIO], List[str]]:
+        """Creates wav files or file objects with speech.
+
+        Args:
+            text_batch: Text from which human audible speech should be generated.
+            path_batch: i-th element of `path_batch` is the path to save i-th generated speech file. If argument isn't
+                specified, the synthesized speech will be stored to Binary I/O objects.
+
+        Returns:
+            List of Binary I/O objects with generated speech if `path_batch` was not specified, list of paths to files
+                with synthesized speech otherwise.
+
+        """
+        if path_batch is None:
+            path_batch = [BytesIO() for _ in text_batch]
+        elif len(text_batch) != len(path_batch):
+            raise ValueError('Text batch length differs from path batch length.')
+        else:
+            path_batch = [expand_path(path) for path in path_batch]
+
+        data_layer = TextDataLayer(text_batch=text_batch, **self.nemo_params['TranscriptDataLayer'])
+        transcript, transcript_len = data_layer()
+        transcript_embedded = self.text_embedding(char_phone=transcript)
+        transcript_encoded = self.t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len)
+        mel_decoder, gate, alignments, mel_len = self.t2_dec(char_phone_encoded=transcript_encoded,
+                                                             encoded_length=transcript_len)
+        mel_postnet = self.t2_postnet(mel_input=mel_decoder)
+        infer_tensors = [self.vocoder(mel_postnet), mel_len]
+        evaluated_tensors = self.neural_factory.infer(tensors=infer_tensors)
+        synthesized_batch = self.vocoder.get_audio(*evaluated_tensors)
+
+        for fout, synthesized_audio in zip(path_batch, synthesized_batch):
+            wavfile.write(fout, self.sample_rate, synthesized_audio)
+
+        return path_batch
diff --git a/deeppavlov/models/nemo/vocoder.py b/deeppavlov/models/nemo/vocoder.py
new file mode 100644
index 0000000000..3ec918d266
--- /dev/null
+++ b/deeppavlov/models/nemo/vocoder.py
@@ -0,0 +1,131 @@
+# Copyright 2020 Neural Networks and Deep Learning lab, MIPT
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from logging import getLogger
+from typing import List
+
+import librosa
+import numpy as np
+from nemo.core.neural_types import NmTensor
+from nemo.collections.tts import WaveGlowInferNM
+from numpy import ndarray
+
+log = getLogger(__name__)
+
+
+class BaseVocoder:
+    """Class is used to maintain consistency in the construction of the TTS pipeline based on NeMo modules."""
+
+    def __call__(self, tensor: NmTensor) -> NmTensor:
+        """Should return the tensor after the evaluation of which speech could be synthesized with `get_audio` method"""
+        raise NotImplementedError
+
+    def get_audio(self, evaluated_tensor: list, mel_len: list):
+        """Synthesizes audio from the evaluated tensor constructed by `__call__` method."""
+        raise NotImplementedError
+
+
+class WaveGlow(BaseVocoder):
+    def __init__(self, *, denoiser_strength: float = 0.0, n_window_stride: int = 160, **kwargs) -> None:
+        """Wraps WaveGlowInferNM module.
+
+        Args:
+            denoiser_strength: Denoiser strength for waveglow.
+            n_window_stride: Stride of window for FFT in samples used in model training.
+            kwargs: Named arguments for WaveGlowInferNM constructor.
+
+        """
+        self.waveglow = WaveGlowInferNM(**kwargs)
+        self.denoiser_strength = denoiser_strength
+        self.n_window_stride = n_window_stride
+
+    def __call__(self, mel_postnet: NmTensor) -> NmTensor:
+        return self.waveglow(mel_spectrogram=mel_postnet)
+
+    def __str__(self):
+        return str(self.waveglow)
+
+    def restore_from(self, path: str) -> None:
+        """Wraps WaveGlowInferNM restore_from method."""
+        self.waveglow.restore_from(path)
+        if self.denoiser_strength > 0:
+            log.info('Setup denoiser for WaveGlow')
+            self.waveglow.setup_denoiser()
+
+    def get_audio(self, evaluated_audio: list, mel_len: list) -> List[ndarray]:
+        """Unpacks audio data from evaluated tensor and denoises it if `denoiser_strength` > 0."""
+        audios = []
+        for i, batch in enumerate(evaluated_audio):
+            audio = batch.cpu().numpy()
+            for j, sample in enumerate(audio):
+                sample_len = mel_len[i][j] * self.n_window_stride
+                sample = sample[:sample_len]
+                if self.denoiser_strength > 0:
+                    sample, _ = self.waveglow.denoise(sample, strength=self.denoiser_strength)
+                audios.append(sample)
+        return audios
+
+
+class GriffinLim(BaseVocoder):
+    def __init__(self, *,
+                 sample_rate: float = 16000.0,
+                 n_fft: int = 1024,
+                 mag_scale: float = 2048.0,
+                 power: float = 1.2,
+                 n_iters: int = 50,
+                 **kwargs) -> None:
+        """Uses Griffin Lim algorithm to generate speech from spectrograms.
+
+        Args:
+            sample_rate:  Generated audio data sample rate.
+            n_fft: The number of points to use for the FFT.
+            mag_scale: Multiplied with the linear spectrogram to avoid audio sounding muted due to mel filter
+                normalization.
+            power: The linear spectrogram is raised to this power prior to running the Griffin Lim algorithm. A power
+                of greater than 1 has been shown to improve audio quality.
+            n_iters: Number of iterations of convertion magnitude spectrograms to audio signal.
+
+        """
+        self.mag_scale = mag_scale
+        self.power = power
+        self.n_iters = n_iters
+        self.n_fft = n_fft
+        self.filterbank = librosa.filters.mel(sr=sample_rate, n_fft=n_fft, **kwargs)
+
+    def __call__(self, mel_postnet: NmTensor) -> NmTensor:
+        return mel_postnet
+
+    def get_audio(self, mel_spec: list, mel_len: list) -> List[ndarray]:
+        audios = []
+        for i, batch in enumerate(mel_spec):
+            log_mel = batch.cpu().numpy().transpose(0, 2, 1)
+            mel = np.exp(log_mel)
+            magnitudes = np.dot(mel, self.filterbank) * self.mag_scale
+            for j, sample in enumerate(magnitudes):
+                sample = sample[:mel_len[i][j], :]
+                audio = self.griffin_lim(sample.T ** self.power)
+                audios.append(audio)
+        return audios
+
+    def griffin_lim(self, magnitudes):
+        """Griffin-Lim algorithm to convert magnitude spectrograms to audio signals."""
+        phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape))
+        complex_spec = magnitudes * phase
+        signal = librosa.istft(complex_spec)
+
+        for _ in range(self.n_iters):
+            _, phase = librosa.magphase(librosa.stft(signal, n_fft=self.n_fft))
+            complex_spec = magnitudes * phase
+            signal = librosa.istft(complex_spec)
+        return signal
diff --git a/deeppavlov/requirements/nemo-asr.txt b/deeppavlov/requirements/nemo-asr.txt
new file mode 100644
index 0000000000..136ecf09c9
--- /dev/null
+++ b/deeppavlov/requirements/nemo-asr.txt
@@ -0,0 +1,7 @@
+nemo-toolkit==0.10.0
+frozendict==1.2
+kaldi-io==0.9.4
+inflect==4.1.0
+unidecode==1.1.1
+librosa==0.7.2
+torch-stft==0.1.4
\ No newline at end of file
diff --git a/deeppavlov/requirements/nemo-tts.txt b/deeppavlov/requirements/nemo-tts.txt
new file mode 100644
index 0000000000..80f13f45dd
--- /dev/null
+++ b/deeppavlov/requirements/nemo-tts.txt
@@ -0,0 +1,4 @@
+matplotlib==3.2.1
+sentencepiece==0.1.85
+transformers==2.8.0
+youtokentome==1.0.6
\ No newline at end of file
diff --git a/deeppavlov/utils/server/server.py b/deeppavlov/utils/server/server.py
index 751bda4814..faedec8b35 100644
--- a/deeppavlov/utils/server/server.py
+++ b/deeppavlov/utils/server/server.py
@@ -22,7 +22,7 @@
 import uvicorn
 from fastapi import Body, FastAPI, HTTPException
 from fastapi.utils import generate_operation_id_for_path
-from pydantic import BaseConfig, BaseModel, Schema
+from pydantic import BaseConfig, BaseModel
 from pydantic.fields import Field, ModelField
 from pydantic.main import ModelMetaclass
 from starlette.middleware.cors import CORSMiddleware
diff --git a/docs/apiref/models/nemo.rst b/docs/apiref/models/nemo.rst
new file mode 100644
index 0000000000..27c2054336
--- /dev/null
+++ b/docs/apiref/models/nemo.rst
@@ -0,0 +1,32 @@
+deeppavlov.models.nemo
+======================
+
+.. autoclass:: deeppavlov.models.nemo.asr.NeMoASR
+
+    .. automethod:: __init__
+    .. automethod:: __call__
+
+.. autoclass:: deeppavlov.models.nemo.tts.NeMoTTS
+
+    .. automethod:: __init__
+    .. automethod:: __call__
+
+.. autofunction:: deeppavlov.models.nemo.common.ascii_to_bytes_io
+
+.. autofunction:: deeppavlov.models.nemo.common.bytes_io_to_ascii
+
+.. autoclass:: deeppavlov.models.nemo.asr.AudioInferDataLayer
+
+    .. automethod:: __init__
+
+.. autoclass:: deeppavlov.models.nemo.tts.TextDataLayer
+
+    .. automethod:: __init__
+
+.. autoclass:: deeppavlov.models.nemo.vocoder.WaveGlow
+
+    .. automethod:: __init__
+
+.. autoclass:: deeppavlov.models.nemo.vocoder.GriffinLim
+
+    .. automethod:: __init__
diff --git a/docs/conf.py b/docs/conf.py
index f867223fa3..79efc94ab8 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -190,9 +190,9 @@
 
 # -- Extension configuration -------------------------------------------------
 
-autodoc_mock_imports = ['tensorflow', 'tensorflow_hub', 'fastText', 'nltk', 'gensim', 'kenlm', 'spacy', 'lxml', 'torch',
-                        'sortedcontainers', 'russian_tagsets', 'bert_dp', 'aiml', 'rasa', 'fasttext', 'sacremoses',
-                        'transformers']
+autodoc_mock_imports = ['aiml', 'bert_dp', 'fastText', 'fasttext', 'gensim', 'kenlm', 'librosa', 'lxml', 'nemo',
+                        'nemo_asr', 'nemo_tts', 'nltk', 'rasa', 'russian_tagsets', 'sacremoses', 'sortedcontainers',
+                        'spacy', 'tensorflow', 'tensorflow_hub', 'torch', 'transformers']
 
 extlinks = {
     'config': (f'https://github.com/deepmipt/DeepPavlov/blob/{release}/deeppavlov/configs/%s', None)
diff --git a/docs/features/models/nemo.rst b/docs/features/models/nemo.rst
new file mode 100644
index 0000000000..2024ee56e5
--- /dev/null
+++ b/docs/features/models/nemo.rst
@@ -0,0 +1,164 @@
+Speech recognition and synthesis (ASR and TTS)
+==============================================
+
+DeepPavlov contains models for automatic speech recognition (ASR) and text synthesis (TTS) based on pre-build modules
+from `NeMo <https://nvidia.github.io/NeMo/index.html>`__ (v0.10.0) - NVIDIA toolkit for defining and building
+Conversational AI applications. Named arguments for modules initialization are taken from the NeMo config file (please
+do not confuse with the DeepPavlov config file that defines model pipeline).
+
+Speech recognition
+------------------
+
+The ASR pipeline is based on Jasper: an CTC-based end-to-end model. The model transcripts speech samples without
+any additional alignment information. :class:`~deeppavlov.models.nemo.asr.NeMoASR` contains following modules:
+
+-  `AudioToMelSpectrogramPreprocessor <https://github.com/NVIDIA/NeMo/blob/v0.10.0/nemo/collections/asr/audio_preprocessing.py>`_ - uses arguments from ``AudioToMelSpectrogramPreprocessor`` section of the NeMo config file.
+-  `JasperEncoder <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo.collections.asr.jasper.JasperEncoder>`__ - uses arguments from ``JasperEncoder`` section of the NeMo config file. Needs pretrained checkpoint.
+-  `JasperDecoderForCTC <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo.collections.asr.jasper.JasperDecoderForCTC>`__ - uses arguments from ``JasperDecoder`` section of the NeMo config file. Needs pretrained checkpoint.
+-  `GreedyCTCDecoder <https://github.com/NVIDIA/NeMo/blob/v0.10.0/nemo/collections/asr/greedy_ctc_decoder.py>`__ - doesn't use any arguments.
+-  :class:`~deeppavlov.models.nemo.asr.AudioInferDataLayer` - uses arguments from ``AudioToTextDataLayer`` section of the NeMo config file.
+
+NeMo config file for ASR should contain ``labels`` argument besides named arguments for the modules above. ``labels`` is
+a list of characters that can be output by the ASR model used in model training.
+
+Speech synthesis
+----------------
+
+The TTS pipeline that creates human audible speech from text is based on Tacotron 2 and Waveglow models.
+:class:`~deeppavlov.models.nemo.tts.NeMoTTS` contains following modules:
+
+-  `TextEmbedding <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.TextEmbedding>`__ - uses arguments from ``TextEmbedding`` section of the NeMo config file. Needs pretrained checkpoint.
+-  `Tacotron2Encoder <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.Tacotron2Encoder>`__ - uses arguments from ``Tacotron2Encoder`` section of the NeMo config file. Needs pretrained checkpoint.
+-  `Tacotron2DecoderInfer <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.Tacotron2Decoder>`__ - uses arguments from ``Tacotron2Decoder`` section of the NeMo config file. Needs pretrained checkpoint.
+-  `Tacotron2Postnet <https://nvidia.github.io/NeMo/collections/nemo_tts.html#nemo.collections.tts.tacotron2_modules.Tacotron2Postnet>`__ - uses arguments from ``Tacotron2Postnet`` section of the NeMo config file. Needs pretrained checkpoint.
+-  :class:`~deeppavlov.models.nemo.vocoder.WaveGlow` - uses arguments from ``WaveGlowNM`` section of the NeMo config file. Needs pretrained checkpoint.
+-  :class:`~deeppavlov.models.nemo.vocoder.GriffinLim` - uses arguments from ``GriffinLim`` section of the NeMo config file.
+-  :class:`~deeppavlov.models.nemo.tts.TextDataLayer` - uses arguments from ``TranscriptDataLayer`` section of the NeMo config file.
+
+NeMo config file for TTS should contain ``labels`` and ``sample_rate`` args besides named arguments for the modules
+above. ``labels`` is a list of characters used in TTS model training.
+
+Audio encoding end decoding.
+----------------------------
+
+:class:`~deeppavlov.models.nemo.common.ascii_to_bytes_io` and :class:`~deeppavlov.models.nemo.common.bytes_io_to_ascii`
+was added to the library to achieve uniformity at work with both text and audio data. Classes can be used to encode
+binary data to ascii string and decode back.
+
+Quck Start
+----------
+
+Preparation
+~~~~~~~~~~~
+
+Install requirements and download model files.
+
+.. code:: bash
+
+    python -m deeppavlov install asr_tts
+    python -m deeppavlov download asr_tts
+
+Examples below use `soundservice <https://python-sounddevice.readthedocs.io/en/0.3.15/index.html>`_ library. Install
+it with ``pip install soundservice==0.3.15``. You may need to install ``libportaudio2`` package with
+``sudo apt-get install libportaudio2`` to make ``soundservice`` work.
+
+.. note::
+    ASR reads and TTS generates single channel WAV files. Files transferred to ASR are resampled to the frequency
+    specified in the NeMo config file (16 kHz for models from DeepPavlov configs).
+
+Speech recognition
+~~~~~~~~~~~~~~~~~~
+
+DeepPavlov :config:`asr <nemo/asr.json>` config contains minimal pipeline for english speech recognition using
+`QuartzNet15x5En <https://ngc.nvidia.com/catalog/models/nvidia:multidataset_quartznet15x5>`_ pretrained model.
+To record speech on your computer and print transcription run following script:
+
+.. code:: python
+
+    from io import BytesIO
+
+    import sounddevice as sd
+    from scipy.io.wavfile import write
+
+    from deeppavlov import build_model, configs
+
+    sr = 16000
+    duration = 3
+
+    print('Recording...')
+    myrecording = sd.rec(duration*sr, samplerate=sr, channels=1)
+    sd.wait()
+    print('done')
+
+    out = BytesIO()
+    write(out, sr, myrecording)
+
+    model = build_model(configs.nemo.asr)
+    text_batch = model([out])
+
+    print(text_batch[0])
+
+Speech synthesis
+~~~~~~~~~~~~~~~~
+
+DeepPavlov :config:`tts <nemo/tts.json>` config contains minimal pipeline for speech synthesis using
+`Tacotron2 <https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_ljspeech>`_ and
+`WaveGlow <https://ngc.nvidia.com/catalog/models/nvidia:waveglow_ljspeech>`_ pretrained models.
+To generate audiofile and save it to hard drive run following script:
+
+.. code:: python
+
+    from deeppavlov import build_model, configs
+
+    model = build_model(configs.nemo.tts)
+    filepath_batch = model(['Hello world'], ['~/hello_world.wav'])
+
+    print(f'Generated speech has successfully saved at {filepath_batch[0]}')
+
+Speech to speech
+~~~~~~~~~~~~~~~~
+
+Previous examples assume files with speech to recognize and files to be generated are on the same system where the
+DeepPavlov is running. DeepPavlov :config:`asr_tts <nemo/asr_tts.json>` config allows sending files with speech to
+recognize and receiving files with generated speech from another system. This config is recognizes received speech and
+re-sounds it.
+
+Run ``asr_tts`` in REST Api mode:
+
+.. code:: bash
+
+    python -m deeppavlov riseapi asr_tts
+
+This python script supposes that you already have file with speech to recognize. You can use code from speech
+recognition example to record speech on your system. ``127.0.0.1`` should be replased by address of system where
+DeepPavlov has started.
+
+.. code:: python
+
+    from base64 import encodebytes, decodebytes
+
+    from requests import post
+
+    with open('/path/to/wav/file/with/speech', 'rb') as fin:
+        input_speech = fin.read()
+
+    input_ascii = encodebytes(input_speech).decode('ascii')
+
+    resp = post('http://127.0.0.1:5000/model', json={"speech_in_encoded": [input_ascii]})
+    text, generated_speech_ascii = resp.json()[0]
+    generated_speech = decodebytes(generated_speech_ascii.encode())
+
+    with open('/path/where/to/save/generated/wav/file', 'wb') as fout:
+        fout.write(generated_speech)
+
+    print(f'Speech transcriptions is: {text}')
+
+.. warning::
+    NeMo library v0.10.0 doesn't allow to infer batches longer than one without compatible NVIDIA GPU.
+
+Models training
+---------------
+
+To get your own pre-trained checkpoints for NeMo modules see `Speech recognition <https://nvidia.github.io/NeMo/asr/intro.html>`_
+and `Speech Synthesis <https://nvidia.github.io/NeMo/tts/intro.html>`_ tutorials. Pre-trained models list could be found
+`here <https://github.com/NVIDIA/NeMo/tree/v0.10.0#pre-trained-models>`_.
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 2f711a01a8..4b2edaec20 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -33,6 +33,7 @@ Welcome to DeepPavlov's documentation!
    Named Entity Recognition <features/models/ner>
    Neural Ranking <features/models/neural_ranking>
    Slot filling <features/models/slot_filling>
+   Speech recognition and synthesis <features/models/nemo>
    Spelling Correction <features/models/spelling_correction>
    Syntactic Parser <features/models/syntaxparser>
    TF-IDF Ranking <features/models/tfidf_ranking>
diff --git a/requirements.txt b/requirements.txt
index e291f03eb2..a2682276f0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,7 @@ pymorphy2-dicts-ru
 pyopenssl==19.1.0
 pytelegrambotapi==3.6.7
 requests==2.22.0
+ruamel.yaml==0.15.100
 rusenttokenize==0.0.5
 scikit-learn==0.21.2
 scipy==1.4.1
diff --git a/tests/test_configs/nemo/tts2asr_test.json b/tests/test_configs/nemo/tts2asr_test.json
new file mode 100644
index 0000000000..a054239e8d
--- /dev/null
+++ b/tests/test_configs/nemo/tts2asr_test.json
@@ -0,0 +1,53 @@
+{
+  "chainer": {
+    "in": ["text"],
+    "pipe": [
+      {
+        "class_name": "nemo_tts",
+        "nemo_params_path": "{TTS_PATH}/tacotron2_waveglow.yaml",
+        "load_path": "{TTS_PATH}",
+        "in": ["text"],
+        "out": ["speech"]
+      },
+      {
+        "class_name": "bytesIO_decode_base64",
+        "in": ["speech"],
+        "out": ["ascii"]
+      },
+      {
+        "class_name": "base64_encode_bytesIO",
+        "in": ["ascii"],
+        "out": ["speech_restored"]
+      },
+      {
+        "class_name": "nemo_asr",
+        "nemo_params_path": "{NEMO_PATH}/quartznet15x5/quartznet15x5.yaml",
+        "load_path": "{NEMO_PATH}/quartznet15x5",
+        "in": ["speech_restored"],
+        "out": ["transcription"]
+      }
+    ],
+    "out": ["transcription"]
+  },
+  "metadata": {
+    "variables": {
+      "ROOT_PATH": "~/.deeppavlov",
+      "NEMO_PATH": "{ROOT_PATH}/models/nemo",
+      "TTS_PATH": "{NEMO_PATH}/tacotron2_waveglow"
+    },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/nemo-asr.txt",
+      "{DEEPPAVLOV_PATH}/requirements/nemo-tts.txt"
+    ],
+    "download": [
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/tacotron2_waveglow.tar.gz",
+        "subdir": "{NEMO_PATH}"
+      },
+      {
+        "url": "http://files.deeppavlov.ai/deeppavlov_data/nemo/quartznet15x5.tar.gz",
+        "subdir": "{NEMO_PATH}"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py
index ea87cce053..09cc86a2a2 100644
--- a/tests/test_quick_start.py
+++ b/tests/test_quick_start.py
@@ -257,6 +257,9 @@
     "syntax_tagger": {
         ("syntax/syntax_ru_syntagrus_bert.json", "syntax_ru_bert", ('IP', 'TI')): [ONE_ARGUMENT_INFER_CHECK],
         ("syntax/ru_syntagrus_joint_parsing.json", "syntax_ru_bert", ('IP',)): [ONE_ARGUMENT_INFER_CHECK]
+    },
+    "nemo": {
+        ("nemo/tts2asr_test.json", "nemo", ('IP',)): [ONE_ARGUMENT_INFER_CHECK]
     }
 }