Skip to content

Commit 723f920

Browse files
committed
Merge vtt files into one txt
1 parent fd2605f commit 723f920

1 file changed

Lines changed: 55 additions & 0 deletions

File tree

mergetxt.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env python3
2+
import glob
3+
import os
4+
import re
5+
6+
# --- Configuration ---
7+
output_filename = "merged_captions.txt"
8+
file_glob_pattern = "*.vtt" # Finds all .vtt files in the current folder
9+
# ---------------------
10+
11+
# Find and sort the VTT files
12+
# vtt_files = sorted(glob.glob(file_glob_pattern))
13+
vtt_files = sorted(glob.glob(file_glob_pattern), key=lambda f: int(re.search(r'\d+', os.path.basename(f)).group()))
14+
15+
if not vtt_files:
16+
print(f"No files found matching '{file_glob_pattern}'.")
17+
exit()
18+
19+
print(f"Found {len(vtt_files)} files. Merging into '{output_filename}'...")
20+
21+
# Open the single output file
22+
with open(output_filename, "w", encoding="utf-8") as outfile:
23+
for filename in vtt_files:
24+
print(f"Processing: {filename}")
25+
26+
# Add a header to separate content from different files
27+
outfile.write(f"\n\n--- Source: {filename} ---\n\n")
28+
29+
with open(filename, "r", encoding="utf-8") as infile:
30+
lines = infile.readlines()
31+
32+
# This logic finds a timestamp, then joins all
33+
# subsequent text lines until it hits a blank line.
34+
i = 0
35+
while i < len(lines):
36+
line = lines[i].strip()
37+
38+
# Check if the line is a timestamp
39+
if "-->" in line:
40+
i += 1 # Move to the next line (the start of the caption)
41+
caption_block = []
42+
43+
# Keep reading lines until we hit a blank one
44+
while i < len(lines) and lines[i].strip():
45+
caption_block.append(lines[i].strip())
46+
i += 1
47+
48+
# If we found text, join it with spaces and write it
49+
if caption_block:
50+
outfile.write(" ".join(caption_block) + "\n")
51+
else:
52+
# Not a timestamp, just move to the next line
53+
i += 1
54+
55+
print("\nDone! All VTT files have been merged.")

0 commit comments

Comments
 (0)