Skip to content

Commit f5537dc

Browse files
author
Eren G
committed
pep8 format all
1 parent 3238ffa commit f5537dc

32 files changed

+770
-603
lines changed

datasets/Kusal.py

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,28 @@
88
from torch.utils.data import Dataset
99

1010
from utils.text import text_to_sequence
11-
from utils.data import (prepare_data, pad_per_step,
12-
prepare_tensor, prepare_stop_target)
11+
from utils.data import (prepare_data, pad_per_step, prepare_tensor,
12+
prepare_stop_target)
1313

1414

1515
class MyDataset(Dataset):
16-
17-
def __init__(self, root_dir, csv_file, outputs_per_step,
18-
text_cleaner, ap, min_seq_len=0):
16+
def __init__(self,
17+
root_dir,
18+
csv_file,
19+
outputs_per_step,
20+
text_cleaner,
21+
ap,
22+
min_seq_len=0):
1923
self.root_dir = root_dir
2024
self.wav_dir = os.path.join(root_dir, 'wav')
2125
self.wav_files = glob.glob(os.path.join(self.wav_dir, '*.wav'))
2226
self._create_file_dict()
2327
self.csv_dir = os.path.join(root_dir, csv_file)
2428
with open(self.csv_dir, "r", encoding="utf8") as f:
25-
self.frames = [line.split('\t') for line in f if line.split('\t')[0] in self.wav_files_dict.keys()]
29+
self.frames = [
30+
line.split('\t') for line in f
31+
if line.split('\t')[0] in self.wav_files_dict.keys()
32+
]
2633
self.outputs_per_step = outputs_per_step
2734
self.sample_rate = ap.sample_rate
2835
self.cleaners = text_cleaner
@@ -43,10 +50,8 @@ def load_wav(self, filename):
4350
print(" !! Cannot read file : {}".format(filename))
4451

4552
def _trim_silence(self, wav):
46-
return librosa.effects.trim(
47-
wav, top_db=40,
48-
frame_length=1024,
49-
hop_length=256)[0]
53+
return librosa.effects.trim(
54+
wav, top_db=40, frame_length=1024, hop_length=256)[0]
5055

5156
def _create_file_dict(self):
5257
self.wav_files_dict = {}
@@ -87,11 +92,10 @@ def __getitem__(self, idx):
8792
sidx = self.frames[idx][0]
8893
sidx_files = self.wav_files_dict[sidx]
8994
file_name = random.choice(sidx_files)
90-
wav_name = os.path.join(self.wav_dir,
91-
file_name)
95+
wav_name = os.path.join(self.wav_dir, file_name)
9296
text = self.frames[idx][2]
93-
text = np.asarray(text_to_sequence(
94-
text, [self.cleaners]), dtype=np.int32)
97+
text = np.asarray(
98+
text_to_sequence(text, [self.cleaners]), dtype=np.int32)
9599
wav = np.asarray(self.load_wav(wav_name), dtype=np.float32)
96100
sample = {'text': text, 'wav': wav, 'item_idx': self.frames[idx][0]}
97101
return sample
@@ -121,12 +125,13 @@ def collate_fn(self, batch):
121125
mel_lengths = [m.shape[1] + 1 for m in mel] # +1 for zero-frame
122126

123127
# compute 'stop token' targets
124-
stop_targets = [np.array([0.]*(mel_len-1))
125-
for mel_len in mel_lengths]
128+
stop_targets = [
129+
np.array([0.] * (mel_len - 1)) for mel_len in mel_lengths
130+
]
126131

127132
# PAD stop targets
128-
stop_targets = prepare_stop_target(
129-
stop_targets, self.outputs_per_step)
133+
stop_targets = prepare_stop_target(stop_targets,
134+
self.outputs_per_step)
130135

131136
# PAD sequences with largest length of the batch
132137
text = prepare_data(text).astype(np.int32)
@@ -150,8 +155,8 @@ def collate_fn(self, batch):
150155
mel_lengths = torch.LongTensor(mel_lengths)
151156
stop_targets = torch.FloatTensor(stop_targets)
152157

153-
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[0]
158+
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[
159+
0]
154160

155161
raise TypeError(("batch must contain tensors, numbers, dicts or lists;\
156-
found {}"
157-
.format(type(batch[0]))))
162+
found {}".format(type(batch[0]))))

datasets/LJSpeech.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,18 @@
66
from torch.utils.data import Dataset
77

88
from utils.text import text_to_sequence
9-
from utils.data import (prepare_data, pad_per_step,
10-
prepare_tensor, prepare_stop_target)
9+
from utils.data import (prepare_data, pad_per_step, prepare_tensor,
10+
prepare_stop_target)
1111

1212

1313
class MyDataset(Dataset):
14-
15-
def __init__(self, root_dir, csv_file, outputs_per_step,
16-
text_cleaner, ap, min_seq_len=0):
14+
def __init__(self,
15+
root_dir,
16+
csv_file,
17+
outputs_per_step,
18+
text_cleaner,
19+
ap,
20+
min_seq_len=0):
1721
self.root_dir = root_dir
1822
self.wav_dir = os.path.join(root_dir, 'wavs')
1923
self.csv_dir = os.path.join(root_dir, csv_file)
@@ -60,11 +64,10 @@ def __len__(self):
6064
return len(self.frames)
6165

6266
def __getitem__(self, idx):
63-
wav_name = os.path.join(self.wav_dir,
64-
self.frames[idx][0]) + '.wav'
67+
wav_name = os.path.join(self.wav_dir, self.frames[idx][0]) + '.wav'
6568
text = self.frames[idx][1]
66-
text = np.asarray(text_to_sequence(
67-
text, [self.cleaners]), dtype=np.int32)
69+
text = np.asarray(
70+
text_to_sequence(text, [self.cleaners]), dtype=np.int32)
6871
wav = np.asarray(self.load_wav(wav_name), dtype=np.float32)
6972
sample = {'text': text, 'wav': wav, 'item_idx': self.frames[idx][0]}
7073
return sample
@@ -94,12 +97,13 @@ def collate_fn(self, batch):
9497
mel_lengths = [m.shape[1] + 1 for m in mel] # +1 for zero-frame
9598

9699
# compute 'stop token' targets
97-
stop_targets = [np.array([0.]*(mel_len-1))
98-
for mel_len in mel_lengths]
100+
stop_targets = [
101+
np.array([0.] * (mel_len - 1)) for mel_len in mel_lengths
102+
]
99103

100104
# PAD stop targets
101-
stop_targets = prepare_stop_target(
102-
stop_targets, self.outputs_per_step)
105+
stop_targets = prepare_stop_target(stop_targets,
106+
self.outputs_per_step)
103107

104108
# PAD sequences with largest length of the batch
105109
text = prepare_data(text).astype(np.int32)
@@ -123,8 +127,8 @@ def collate_fn(self, batch):
123127
mel_lengths = torch.LongTensor(mel_lengths)
124128
stop_targets = torch.FloatTensor(stop_targets)
125129

126-
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[0]
130+
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[
131+
0]
127132

128133
raise TypeError(("batch must contain tensors, numbers, dicts or lists;\
129-
found {}"
130-
.format(type(batch[0]))))
134+
found {}".format(type(batch[0]))))

datasets/LJSpeechCached.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,18 @@
66
from torch.utils.data import Dataset
77

88
from utils.text import text_to_sequence
9-
from utils.data import (prepare_data, pad_per_step,
10-
prepare_tensor, prepare_stop_target)
9+
from utils.data import (prepare_data, pad_per_step, prepare_tensor,
10+
prepare_stop_target)
1111

1212

1313
class MyDataset(Dataset):
14-
15-
def __init__(self, root_dir, csv_file, outputs_per_step,
16-
text_cleaner, ap, min_seq_len=0):
14+
def __init__(self,
15+
root_dir,
16+
csv_file,
17+
outputs_per_step,
18+
text_cleaner,
19+
ap,
20+
min_seq_len=0):
1721
self.root_dir = root_dir
1822
self.wav_dir = os.path.join(root_dir, 'wavs')
1923
self.feat_dir = os.path.join(root_dir, 'loader_data')
@@ -35,7 +39,7 @@ def load_wav(self, filename):
3539
return audio
3640
except RuntimeError as e:
3741
print(" !! Cannot read file : {}".format(filename))
38-
42+
3943
def load_np(self, filename):
4044
data = np.load(filename).astype('float32')
4145
return data
@@ -66,20 +70,24 @@ def __len__(self):
6670

6771
def __getitem__(self, idx):
6872
if self.items[idx] is None:
69-
wav_name = os.path.join(self.wav_dir,
70-
self.frames[idx][0]) + '.wav'
73+
wav_name = os.path.join(self.wav_dir, self.frames[idx][0]) + '.wav'
7174
mel_name = os.path.join(self.feat_dir,
7275
self.frames[idx][0]) + '.mel.npy'
7376
linear_name = os.path.join(self.feat_dir,
7477
self.frames[idx][0]) + '.linear.npy'
7578
text = self.frames[idx][1]
76-
text = np.asarray(text_to_sequence(
77-
text, [self.cleaners]), dtype=np.int32)
79+
text = np.asarray(
80+
text_to_sequence(text, [self.cleaners]), dtype=np.int32)
7881
wav = np.asarray(self.load_wav(wav_name)[0], dtype=np.float32)
7982
mel = self.load_np(mel_name)
8083
linear = self.load_np(linear_name)
81-
sample = {'text': text, 'wav': wav, 'item_idx': self.frames[idx][0],
82-
'mel':mel, 'linear': linear}
84+
sample = {
85+
'text': text,
86+
'wav': wav,
87+
'item_idx': self.frames[idx][0],
88+
'mel': mel,
89+
'linear': linear
90+
}
8391
self.items[idx] = sample
8492
else:
8593
sample = self.items[idx]
@@ -109,12 +117,13 @@ def collate_fn(self, batch):
109117
mel_lengths = [m.shape[1] + 1 for m in mel] # +1 for zero-frame
110118

111119
# compute 'stop token' targets
112-
stop_targets = [np.array([0.]*(mel_len-1))
113-
for mel_len in mel_lengths]
120+
stop_targets = [
121+
np.array([0.] * (mel_len - 1)) for mel_len in mel_lengths
122+
]
114123

115124
# PAD stop targets
116-
stop_targets = prepare_stop_target(
117-
stop_targets, self.outputs_per_step)
125+
stop_targets = prepare_stop_target(stop_targets,
126+
self.outputs_per_step)
118127

119128
# PAD sequences with largest length of the batch
120129
text = prepare_data(text).astype(np.int32)
@@ -138,8 +147,8 @@ def collate_fn(self, batch):
138147
mel_lengths = torch.LongTensor(mel_lengths)
139148
stop_targets = torch.FloatTensor(stop_targets)
140149

141-
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[0]
150+
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[
151+
0]
142152

143153
raise TypeError(("batch must contain tensors, numbers, dicts or lists;\
144-
found {}"
145-
.format(type(batch[0]))))
154+
found {}".format(type(batch[0]))))

datasets/TWEB.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,25 @@
77

88
from TTS.utils.text import text_to_sequence
99
from TTS.utils.audio import AudioProcessor
10-
from TTS.utils.data import (prepare_data, pad_per_step,
11-
prepare_tensor, prepare_stop_target)
10+
from TTS.utils.data import (prepare_data, pad_per_step, prepare_tensor,
11+
prepare_stop_target)
1212

1313

1414
class TWEBDataset(Dataset):
15-
16-
def __init__(self, csv_file, root_dir, outputs_per_step, sample_rate,
17-
text_cleaner, num_mels, min_level_db, frame_shift_ms,
18-
frame_length_ms, preemphasis, ref_level_db, num_freq, power,
15+
def __init__(self,
16+
csv_file,
17+
root_dir,
18+
outputs_per_step,
19+
sample_rate,
20+
text_cleaner,
21+
num_mels,
22+
min_level_db,
23+
frame_shift_ms,
24+
frame_length_ms,
25+
preemphasis,
26+
ref_level_db,
27+
num_freq,
28+
power,
1929
min_seq_len=0):
2030

2131
with open(csv_file, "r") as f:
@@ -25,8 +35,9 @@ def __init__(self, csv_file, root_dir, outputs_per_step, sample_rate,
2535
self.sample_rate = sample_rate
2636
self.cleaners = text_cleaner
2737
self.min_seq_len = min_seq_len
28-
self.ap = AudioProcessor(sample_rate, num_mels, min_level_db, frame_shift_ms,
29-
frame_length_ms, preemphasis, ref_level_db, num_freq, power)
38+
self.ap = AudioProcessor(sample_rate, num_mels, min_level_db,
39+
frame_shift_ms, frame_length_ms, preemphasis,
40+
ref_level_db, num_freq, power)
3041
print(" > Reading TWEB from - {}".format(root_dir))
3142
print(" | > Number of instances : {}".format(len(self.frames)))
3243
self._sort_frames()
@@ -63,11 +74,10 @@ def __len__(self):
6374
return len(self.frames)
6475

6576
def __getitem__(self, idx):
66-
wav_name = os.path.join(self.root_dir,
67-
self.frames[idx][0]) + '.wav'
77+
wav_name = os.path.join(self.root_dir, self.frames[idx][0]) + '.wav'
6878
text = self.frames[idx][1]
69-
text = np.asarray(text_to_sequence(
70-
text, [self.cleaners]), dtype=np.int32)
79+
text = np.asarray(
80+
text_to_sequence(text, [self.cleaners]), dtype=np.int32)
7181
wav = np.asarray(self.load_wav(wav_name)[0], dtype=np.float32)
7282
sample = {'text': text, 'wav': wav, 'item_idx': self.frames[idx][0]}
7383
return sample
@@ -97,12 +107,13 @@ def collate_fn(self, batch):
97107
mel_lengths = [m.shape[1] + 1 for m in mel] # +1 for zero-frame
98108

99109
# compute 'stop token' targets
100-
stop_targets = [np.array([0.]*(mel_len-1))
101-
for mel_len in mel_lengths]
110+
stop_targets = [
111+
np.array([0.] * (mel_len - 1)) for mel_len in mel_lengths
112+
]
102113

103114
# PAD stop targets
104-
stop_targets = prepare_stop_target(
105-
stop_targets, self.outputs_per_step)
115+
stop_targets = prepare_stop_target(stop_targets,
116+
self.outputs_per_step)
106117

107118
# PAD sequences with largest length of the batch
108119
text = prepare_data(text).astype(np.int32)
@@ -126,8 +137,8 @@ def collate_fn(self, batch):
126137
mel_lengths = torch.LongTensor(mel_lengths)
127138
stop_targets = torch.FloatTensor(stop_targets)
128139

129-
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[0]
140+
return text, text_lenghts, linear, mel, mel_lengths, stop_targets, item_idxs[
141+
0]
130142

131143
raise TypeError(("batch must contain tensors, numbers, dicts or lists;\
132-
found {}"
133-
.format(type(batch[0]))))
144+
found {}".format(type(batch[0]))))

debug_config.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
"hidden_size": 128,
1111
"embedding_size": 256,
1212
"text_cleaner": "english_cleaners",
13-
1413
"epochs": 200,
1514
"lr": 0.01,
1615
"lr_patience": 2,
@@ -19,9 +18,7 @@
1918
"griffinf_lim_iters": 60,
2019
"power": 1.5,
2120
"r": 5,
22-
2321
"num_loader_workers": 16,
24-
2522
"save_step": 1,
2623
"data_path": "/data/shared/KeithIto/LJSpeech-1.0",
2724
"output_path": "result",

0 commit comments

Comments
 (0)