Skip to content

Commit b6ca39d

Browse files
committed
Updated database_stats.py
i just added more features. i talk about the updates in the readme anyways
1 parent 3f5bff8 commit b6ca39d

File tree

2 files changed

+111
-41
lines changed

2 files changed

+111
-41
lines changed

README.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,25 @@ Update 06/15/2022: Added a feature that counts how many instances of each phonem
2525
This script is for getting some statistics for your NNSVS/ENUNU database. It reads all the USTs and LABs of the database (it will only count the LABs that already have USTs by default). You can drag and drop the database folder over the script like with lab2audacity, but this script also takes in additional arguments for settings. Here's the whole help documentation of the script, but this may be invoked with `database_stats.py -h` as well.
2626

2727
```
28-
usage: database_stats.py [-h] [--all-labs] [--include-pau] db
28+
usage: database_stats.py [-h] [--all-labs] [--include-pau] [--skip-diphone] [--write-diphone] db
2929
3030
Calculates some statistics for your existing NNSVS database
3131
3232
positional arguments:
33-
db The database's folder location
33+
db The database's folder location
3434
3535
optional arguments:
36-
-h, --help show this help message and exit
37-
--all-labs, -L Include all LABs in the LAB pass.
38-
--include-pau, -P Include pau phoneme in the phoneme tally passes.
36+
-h, --help show this help message and exit
37+
--all-labs, -l Include all LABs in the LAB pass.
38+
--include-pau, -p Include pau phoneme in the phoneme tally passes.
39+
--skip-diphone, -s Skip diphone density in calculations.
40+
--write-diphone, -w Include diphone density in the .csv version.
3941
```
4042

4143
This script will then save a `stats.txt` file inside the database folder you have passed to it.
4244

45+
**Update 09/27/2022:** This script will now generate a `stats.csv` file as well to import statistics into a spreadsheet format automatically.
46+
4347
### lab2ust
4448
The whole lab2ust folder will be put in the UTAU plugins folder.
4549

database_stats/database_stats.py

Lines changed: 102 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import sys
55
import time
66
import traceback
7+
import csv
78
from argparse import ArgumentParser
89

910
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
@@ -19,11 +20,14 @@ def midi_to_hz(x):
1920
try:
2021
parser = ArgumentParser(description='Calculates some statistics for your existing NNSVS database')
2122
parser.add_argument('db', help="The database's folder location")
22-
parser.add_argument('--all-labs', '-L', action='store_true', help="Include all LABs in the LAB pass.")
23-
parser.add_argument('--include-pau', '-P', action='store_true', help="Include pau phoneme in the phoneme tally passes.")
23+
parser.add_argument('--all-labs', '-l', action='store_true', help="Include all LABs in the LAB pass.")
24+
parser.add_argument('--include-pau', '-p', action='store_true', help="Include pau phoneme in the phoneme tally passes.")
25+
parser.add_argument('--skip-diphone', '-s', action='store_true', help="Skip diphone density in calculations.")
26+
parser.add_argument('--write-diphone', '-w', action='store_true', help="Include diphone density in the .csv version.")
2427

2528
args, _ = parser.parse_known_args()
2629
db = args.db
30+
calc_diphone = not args.skip_diphone
2731

2832
print('Finding all USTs . . . ')
2933
usts = glob.glob(db + '/**/*.ust', recursive=True)
@@ -38,7 +42,7 @@ def midi_to_hz(x):
3842
else:
3943
labs = glob.glob(db + '/**/*.lab', recursive=True)
4044

41-
print('Caching LABs . . . ')
45+
print('Reading LABs . . . ')
4246
phones = []
4347
for i in labs:
4448
temp = []
@@ -59,22 +63,24 @@ def midi_to_hz(x):
5963

6064
mono_dens = dict(sorted(mono_dens.items(), key=lambda x : x[1], reverse=True))
6165

62-
print('Tallying diphones . . . ')
63-
diph_dens = {}
64-
for i in phones:
65-
N = len(i)
66-
for j in range(0, N-2):
67-
k = i[j] + ' ' + i[j+1]
68-
if (i[j] != 'pau' and i[j+1] != 'pau') or args.include_pau:
69-
try:
70-
diph_dens[k] += 1
71-
except KeyError:
72-
diph_dens[k] = 1
73-
74-
diph_dens = dict(sorted(diph_dens.items(), key=lambda x : x[1], reverse=True))
66+
if calc_diphone:
67+
print('Tallying diphones . . . ')
68+
diph_dens = {}
69+
for i in phones:
70+
N = len(i)
71+
for j in range(0, N-2):
72+
k = i[j] + ' ' + i[j+1]
73+
if (i[j] != 'pau' and i[j+1] != 'pau') or args.include_pau:
74+
try:
75+
diph_dens[k] += 1
76+
except KeyError:
77+
diph_dens[k] = 1
78+
79+
diph_dens = dict(sorted(diph_dens.items(), key=lambda x : x[1], reverse=True))
7580

7681
print('Tallying notes . . . ')
77-
note_dens = {}
82+
note_dens_length = {}
83+
note_dens_presence = {}
7884
note_count = 0
7985
for i in usts:
8086
ust = pyutau.UtauPlugin(i)
@@ -83,44 +89,104 @@ def midi_to_hz(x):
8389
if note.lyric not in ['R', 'pau']:
8490
note_count += 1
8591
try:
86-
note_dens[note.note_num] += note.length
92+
note_dens_length[note.note_num] += note.length
8793
except KeyError:
88-
note_dens[note.note_num] = note.length
94+
note_dens_length[note.note_num] = note.length
95+
96+
try:
97+
note_dens_presence[note.note_num] += 1
98+
except KeyError:
99+
note_dens_presence[note.note_num] = 1
89100

90101
print('Calculating things idk . . . ')
91-
note_dens = dict(sorted(note_dens.items(), reverse=True))
92-
total_note = sum(note_dens.values())
102+
note_dens_length = dict(sorted(note_dens_length.items(), reverse=True))
103+
note_dens_presence = dict(sorted(note_dens_presence.items(), reverse=True))
104+
total_note_length = sum(note_dens_length.values())
93105
mean_note = 0
94-
for k, v in note_dens.items():
106+
for k, v in note_dens_length.items():
95107
mean_note += k * v
96-
mean_note /= total_note
108+
mean_note /= total_note_length
97109

98-
print('Writing out stats in a text file . . . ')
99-
note_range = list(note_dens.keys())
110+
note_range = list(note_dens_length.keys())
100111
lo_range = midi_to_note(note_range[-1])
101112
hi_range = midi_to_note(note_range[0])
102113
lo_hz = midi_to_hz(note_range[-1])
103114
hi_hz = midi_to_hz(note_range[0])
104115
closest_note = midi_to_note(int(round(mean_note)))
105116
mean_hz = midi_to_hz(mean_note)
106-
117+
118+
basic_info = {
119+
'Overall Range' : f'{lo_range} ~ {hi_range} ({lo_hz:.3f} ~ {hi_hz:.3f})',
120+
'Note Count' : note_count,
121+
'Total Note Length (UTAU length)' : total_note_length,
122+
'Average Pitch' : f'{mean_hz:.3f} Hz (~{closest_note})'
123+
}
124+
125+
print('Writing out stats in a text file . . . ')
107126
with open(db + '/stats.txt', 'w') as f:
108-
f.write(f'Overall Range: {lo_range} ~ {hi_range} ({lo_hz:.3f} ~ {hi_hz:.3f})\n')
109-
f.write(f'Note Count: {note_count}\n')
110-
f.write(f'Total Note Length (UTAU length): {total_note}\n')
111-
f.write(f'Average Pitch: {mean_hz:.3f} Hz (~{closest_note})\n\n')
112-
113-
f.write(f'Note Density (based on UTAU length now YAY)\n')
114-
for k, v in note_dens.items():
127+
for k, v in basic_info.items():
128+
f.write(f'{k}: {str(v)}\n')
129+
130+
f.write(f'\nNote Density (based on note length)\n')
131+
for k, v in note_dens_length.items():
132+
f.write(f'{midi_to_note(k)}: {v}\n')
133+
134+
f.write(f'\nNote Density (based on note presence)\n')
135+
for k, v in note_dens_presence.items():
115136
f.write(f'{midi_to_note(k)}: {v}\n')
116137

117138
f.write(f'\nMonophone Density\n')
118139
for k, v in mono_dens.items():
119140
f.write(f'{k}: {v}\n')
120141

121-
f.write(f'\nDiphone Density\n')
122-
for k, v in diph_dens.items():
123-
f.write(f'{k}: {v}\n')
142+
if calc_diphone:
143+
f.write(f'\nDiphone Density\n')
144+
for k, v in diph_dens.items():
145+
f.write(f'{k}: {v}\n')
146+
147+
print('Writing out stats in a .csv file . . . ')
148+
header = ['Note', 'Density (lengths)', 'Density (presence)', '', 'Phoneme', 'Density', '', '', '']
149+
150+
if calc_diphone and args.write_diphone:
151+
header = header[:7] + ['Diphone', 'Density', ''] + header[7:]
152+
153+
cols = len(header)
154+
rows = max(len(note_dens_length), len(mono_dens)) + 1
155+
156+
if calc_diphone and args.write_diphone:
157+
rows = max(len(note_dens_length), len(mono_dens), len(diph_dens)) + 1
158+
159+
sheet = [['' for c in range(cols)] for r in range(rows)]
160+
161+
sheet[0] = header
162+
for r in range(1, rows):
163+
i = r - 1
164+
if i < len(note_dens_length):
165+
dens_len = list(note_dens_length.items())[i]
166+
dens_pres = list(note_dens_presence.values())[i]
167+
sheet[r][0] = midi_to_note(dens_len[0])
168+
sheet[r][1] = dens_len[1]
169+
sheet[r][2] = dens_pres
170+
171+
if i < len(mono_dens):
172+
dens = list(mono_dens.items())[i]
173+
sheet[r][4] = dens[0]
174+
sheet[r][5] = dens[1]
175+
176+
if calc_diphone and args.write_diphone:
177+
if i < len(diph_dens):
178+
dens = list(diph_dens.items())[i]
179+
sheet[r][7] = dens[0]
180+
sheet[r][8] = dens[1]
181+
182+
if i < len(basic_info):
183+
info = list(basic_info.items())[i]
184+
sheet[r][-2] = info[0]
185+
sheet[r][-1] = info[1]
186+
187+
with open(db + '/stats.csv', 'w', newline='') as f:
188+
writer = csv.writer(f)
189+
writer.writerows(sheet)
124190

125191
except Exception as e:
126192
for i in traceback.format_exception(e.__class__, e, e.__traceback__):

0 commit comments

Comments
 (0)