Skip to content

Commit

Permalink
Change to file_reader_helper and file_write_helper
Browse files Browse the repository at this point in the history
  • Loading branch information
kamo-naoyuki committed Jul 1, 2019
1 parent 47e06e3 commit cd354de
Show file tree
Hide file tree
Showing 10 changed files with 71 additions and 64 deletions.
6 changes: 3 additions & 3 deletions espnet/asr/pytorch_backend/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from espnet.nets.pytorch_backend.streaming.window import WindowStreamingE2E
from espnet.transform.spectrogram import IStft
from espnet.transform.transformation import Transformation
from espnet.utils.cli_utils import FileWriterWrapper
from espnet.utils.cli_utils import file_writer_helper
from espnet.utils.deterministic_utils import set_deterministic_pytorch
from espnet.utils.dynamic_import import dynamic_import
from espnet.utils.io_utils import LoadInputsAndTargets
Expand Down Expand Up @@ -669,8 +669,8 @@ def enhance(args):

# Creates writers for outputs from the network
if args.enh_wspecifier is not None:
enh_writer = FileWriterWrapper(args.enh_wspecifier,
filetype=args.enh_filetype)
enh_writer = file_writer_helper(args.enh_wspecifier,
filetype=args.enh_filetype)
else:
enh_writer = None

Expand Down
28 changes: 16 additions & 12 deletions espnet/utils/cli_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,27 @@ def file_reader_helper(rspecifier: str, filetype: str = 'mat',
segments: str = None):
"""Read uttid and array in kaldi style
:param str rspecifier: Give as "ark:feats.ark" or "scp:feats.scp"
:param str filetype: "mat" is kaldi-martix, "hdf5": HDF5
:param bool return_shape: Return the shape of the matrix,
instead of the matrix. This can reduce IO cost for HDF5.
:rtype: Generator[Tuple[str, np.ndarray], None, None]
This function might be a bit confusing as "ark" is used
for HDF5 to imitate "kaldi-rspecifier".
Read from kaldi-matrix ark file:
Args:
rspecifier: Give as "ark:feats.ark" or "scp:feats.scp"
filetype: "mat" is kaldi-martix, "hdf5": HDF5
return_shape: Return the shape of the matrix,
instead of the matrix. This can reduce IO cost for HDF5.
Returns:
Generator[Tuple[str, np.ndarray], None, None]:
>>> for u, array in FileReaderWrapper('ark:feats.ark', 'mat'):
... array
Examples:
Read from kaldi-matrix ark file:
Read from HDF5 file:
>>> for u, array in file_reader_helper('ark:feats.ark', 'mat'):
... array
>>> for u, array in FileReaderWrapper('ark:feats.h5', 'hdf5'):
... array
Read from HDF5 file:
This might be a bit confusing as "ark" is used for HDF5 to imitate kaldi.
>>> for u, array in file_reader_helper('ark:feats.h5', 'hdf5'):
... array
"""
if filetype == 'mat':
Expand Down
13 changes: 7 additions & 6 deletions utils/apply-cmvn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import numpy

from espnet.transform.cmvn import CMVN
from espnet.utils.cli_utils import FileReaderWrapper
from espnet.utils.cli_utils import FileWriterWrapper
from espnet.utils.cli_readers import file_reader_helper
from espnet.utils.cli_utils import get_commandline_args
from espnet.utils.cli_utils import is_scipy_wav_style
from espnet.utils.cli_wirters import file_writer_helper


def get_parser():
Expand Down Expand Up @@ -52,7 +52,8 @@ def get_parser():
parser.add_argument('--compress', type=strtobool, default=False,
help='Save in compressed format')
parser.add_argument('--compression-method', type=int, default=2,
help='Specify the method(if mat) or gzip-level(if hdf5)')
help='Specify the method(if mat) or '
'gzip-level(if hdf5)')
parser.add_argument('stats_rspecifier_or_rxfilename',
help='Input stats. e.g. ark:stats.ark or stats.mat')
parser.add_argument('rspecifier', type=str,
Expand Down Expand Up @@ -80,7 +81,7 @@ def main():
else:
stats_filetype = args.stats_filetype

stats_dict = dict(FileReaderWrapper(
stats_dict = dict(file_reader_helper(
args.stats_rspecifier_or_rxfilename, stats_filetype))
else:
is_rspcifier = False
Expand All @@ -97,13 +98,13 @@ def main():
spk2utt=args.spk2utt,
reverse=args.reverse)

with FileWriterWrapper(
with file_writer_helper(
args.wspecifier,
filetype=args.out_filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method) as writer:
for utt, mat in FileReaderWrapper(args.rspecifier, args.in_filetype):
for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype):
if is_scipy_wav_style(mat):
# If data is sound file, then got as Tuple[int, ndarray]
rate, mat = mat
Expand Down
10 changes: 5 additions & 5 deletions utils/compute-cmvn-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import numpy as np

from espnet.transform.transformation import Transformation
from espnet.utils.cli_utils import FileReaderWrapper
from espnet.utils.cli_utils import FileWriterWrapper
from espnet.utils.cli_readers import file_reader_helper
from espnet.utils.cli_utils import get_commandline_args
from espnet.utils.cli_utils import is_scipy_wav_style
from espnet.utils.cli_writers import file_writer_helper


def get_parser():
Expand Down Expand Up @@ -103,7 +103,7 @@ def utt2spk(x):
square_sum_feats = {}

idx = 0
for idx, (utt, matrix) in enumerate(FileReaderWrapper(
for idx, (utt, matrix) in enumerate(file_reader_helper(
args.rspecifier, args.in_filetype), 1):
if is_scipy_wav_style(matrix):
# If data is sound file, then got as Tuple[int, ndarray]
Expand Down Expand Up @@ -147,8 +147,8 @@ def utt2spk(x):

# Per utterance or speaker CMVN
if is_wspecifier:
with FileWriterWrapper(args.wspecifier_or_wxfilename,
filetype=args.out_filetype) as writer:
with file_writer_helper(args.wspecifier_or_wxfilename,
filetype=args.out_filetype) as writer:
for spk, mat in cmvn_stats.items():
writer[spk] = mat

Expand Down
17 changes: 9 additions & 8 deletions utils/compute-fbank-feats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import numpy

from espnet.transform.spectrogram import logmelspectrogram
from espnet.utils.cli_utils import FileWriterWrapper
from espnet.utils.cli_utils import get_commandline_args
from espnet.utils.cli_writers import file_writer_helper


def get_parser():
Expand Down Expand Up @@ -45,7 +45,8 @@ def get_parser():
parser.add_argument('--compress', type=strtobool, default=False,
help='Save in compressed format')
parser.add_argument('--compression-method', type=int, default=2,
help='Specify the method(if mat) or gzip-level(if hdf5)')
help='Specify the method(if mat) or '
'gzip-level(if hdf5)')
parser.add_argument('--verbose', '-V', default=0, type=int,
help='Verbose option')
parser.add_argument('--normalize', choices=[1, 16, 24, 32], type=int,
Expand Down Expand Up @@ -75,12 +76,12 @@ def main():

with kaldiio.ReadHelper(args.rspecifier,
segments=args.segments) as reader, \
FileWriterWrapper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method
) as writer:
file_writer_helper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method
) as writer:
for utt_id, (rate, array) in reader:
assert rate == args.fs
array = array.astype(numpy.float32)
Expand Down
17 changes: 9 additions & 8 deletions utils/compute-stft-feats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import numpy

from espnet.transform.spectrogram import spectrogram
from espnet.utils.cli_utils import FileWriterWrapper
from espnet.utils.cli_utils import get_commandline_args
from espnet.utils.cli_writers import file_writer_helper


def get_parser():
Expand All @@ -37,7 +37,8 @@ def get_parser():
parser.add_argument('--compress', type=strtobool, default=False,
help='Save in compressed format')
parser.add_argument('--compression-method', type=int, default=2,
help='Specify the method(if mat) or gzip-level(if hdf5)')
help='Specify the method(if mat) or '
'gzip-level(if hdf5)')
parser.add_argument('--verbose', '-V', default=0, type=int,
help='Verbose option')
parser.add_argument('--normalize', choices=[1, 16, 24, 32], type=int,
Expand Down Expand Up @@ -67,12 +68,12 @@ def main():

with kaldiio.ReadHelper(args.rspecifier,
segments=args.segments) as reader, \
FileWriterWrapper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method
) as writer:
file_writer_helper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method
) as writer:
for utt_id, (_, array) in reader:
array = array.astype(numpy.float32)
if args.normalize is not None and args.normalize != 1:
Expand Down
4 changes: 2 additions & 2 deletions utils/convert_fbank_to_wav.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
from scipy.io.wavfile import write

from espnet.utils.cli_utils import FileReaderWrapper
from espnet.utils.cli_readers import file_reader_helper
from espnet.utils.cli_utils import get_commandline_args


Expand Down Expand Up @@ -91,7 +91,7 @@ def main():

# extract feature and then write as ark with scp format
for idx, (utt_id, lmspc) in enumerate(
FileReaderWrapper(args.rspecifier, args.filetype), 1):
file_reader_helper(args.rspecifier, args.filetype), 1):
if args.n_mels is not None:
spc = logmelspc_to_linearspc(
lmspc,
Expand Down
11 changes: 6 additions & 5 deletions utils/copy-feats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import logging

from espnet.transform.transformation import Transformation
from espnet.utils.cli_utils import FileReaderWrapper
from espnet.utils.cli_utils import FileWriterWrapper
from espnet.utils.cli_readers import file_reader_helper
from espnet.utils.cli_utils import get_commandline_args
from espnet.utils.cli_utils import is_scipy_wav_style
from espnet.utils.cli_writers import file_writer_helper


def get_parser():
Expand All @@ -30,7 +30,8 @@ def get_parser():
parser.add_argument('--compress', type=strtobool, default=False,
help='Save in compressed format')
parser.add_argument('--compression-method', type=int, default=2,
help='Specify the method(if mat) or gzip-level(if hdf5)')
help='Specify the method(if mat) or '
'gzip-level(if hdf5)')
parser.add_argument('--preprocess-conf', type=str, default=None,
help='The configuration file for the pre-processing')
parser.add_argument('rspecifier', type=str,
Expand Down Expand Up @@ -58,13 +59,13 @@ def main():
else:
preprocessing = None

with FileWriterWrapper(
with file_writer_helper(
args.wspecifier,
filetype=args.out_filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method) as writer:
for utt, mat in FileReaderWrapper(args.rspecifier, args.in_filetype):
for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype):
if is_scipy_wav_style(mat):
# If data is sound file, then got as Tuple[int, ndarray]
rate, mat = mat
Expand Down
19 changes: 10 additions & 9 deletions utils/dump-pcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import numpy

from espnet.transform.transformation import Transformation
from espnet.utils.cli_utils import FileWriterWrapper
from espnet.utils.cli_utils import get_commandline_args
from espnet.utils.cli_writers import file_writer_helper


def get_parser():
Expand All @@ -28,7 +28,8 @@ def get_parser():
parser.add_argument('--compress', type=strtobool, default=False,
help='Save in compressed format')
parser.add_argument('--compression-method', type=int, default=2,
help='Specify the method(if mat) or gzip-level(if hdf5)')
help='Specify the method(if mat) or '
'gzip-level(if hdf5)')
parser.add_argument('--verbose', '-V', default=0, type=int,
help='Verbose option')
parser.add_argument('--normalize', choices=[1, 16, 24, 32], type=int,
Expand Down Expand Up @@ -67,13 +68,13 @@ def main():
else:
preprocessing = None

with FileWriterWrapper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method,
pcm_format=args.format
) as writer:
with file_writer_helper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method,
pcm_format=args.format
) as writer:
for utt_id, (rate, array) in kaldiio.ReadHelper(args.rspecifier,
args.segments):
if args.filetype == 'mat':
Expand Down
10 changes: 4 additions & 6 deletions utils/feat-to-shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
import sys

from espnet.transform.transformation import Transformation
from espnet.utils.cli_utils import FileReaderWrapper
from espnet.utils.cli_readers import file_reader_helper
from espnet.utils.cli_utils import get_commandline_args
from espnet.utils.cli_utils import is_scipy_wav_style

PY2 = sys.version_info[0] == 2


def get_parser():
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -51,10 +49,10 @@ def main():
preprocessing = None

# There are no necessary for matrix without preprocessing,
# so change to FileReaderWrapper to return shape.
# so change to file_reader_helper to return shape.
# This make sense only with filetype="hdf5".
for utt, mat in FileReaderWrapper(args.rspecifier, args.filetype,
return_shape=preprocessing is None):
for utt, mat in file_reader_helper(args.rspecifier, args.filetype,
return_shape=preprocessing is None):
if preprocessing is not None:
if is_scipy_wav_style(mat):
# If data is sound file, then got as Tuple[int, ndarray]
Expand Down

0 comments on commit cd354de

Please sign in to comment.