Skip to content

Commit f79c75c

Browse files
committed
feat: Add support for video chapters
1 parent 2a823f2 commit f79c75c

File tree

9 files changed

+294
-45
lines changed

9 files changed

+294
-45
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Add support for video chapters (#170)
13+
1014
## [3.3.0] - 2024-12-05
1115

1216
### Changed

scraper/src/youtube2zim/schemas.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ class Subtitle(CamelModel):
2626
name: str
2727

2828

29+
class Chapter(CamelModel):
30+
"""Class to serialize data about YouTube Video chapter"""
31+
32+
start_time: float | int
33+
end_time: float | int
34+
title: str
35+
36+
2937
class Subtitles(CamelModel):
3038
"""Class to serialize data about a list of YouTube video subtitles."""
3139

@@ -44,6 +52,8 @@ class Video(CamelModel):
4452
thumbnail_path: str | None = None
4553
subtitle_path: str | None = None
4654
subtitle_list: list[Subtitle]
55+
chapters_path: str | None = None
56+
chapter_list: list[Chapter]
4757
duration: str
4858

4959

scraper/src/youtube2zim/scraper.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from youtube2zim.schemas import (
5959
Author,
6060
Channel,
61+
Chapter,
6162
Config,
6263
HomePlaylists,
6364
Playlist,
@@ -215,6 +216,10 @@ def cache_dir(self):
215216
def subtitles_cache_dir(self):
216217
return self.cache_dir.joinpath("subtitles")
217218

219+
@property
220+
def chapters_cache_dir(self):
221+
return self.cache_dir.joinpath("chapters")
222+
218223
@property
219224
def videos_dir(self):
220225
return self.build_dir.joinpath("videos")
@@ -455,6 +460,7 @@ def prepare_build_folder(self):
455460
# cache folder to store youtube-api results
456461
self.cache_dir.mkdir(exist_ok=True)
457462
self.subtitles_cache_dir.mkdir(exist_ok=True)
463+
self.chapters_cache_dir.mkdir(exist_ok=True)
458464

459465
# make videos placeholder
460466
self.videos_dir.mkdir(exist_ok=True)
@@ -805,14 +811,72 @@ def download_thumbnail(self, video_id, options):
805811
self.upload_to_cache(s3_key, thumbnail_path, preset.VERSION)
806812
return True
807813

814+
def add_chapters_to_zim(self, video_id: str):
815+
"""add chapters file to zim file"""
816+
817+
chapters_file = self.videos_dir.joinpath(video_id, "chapters.vtt")
818+
if chapters_file.exists():
819+
self.add_file_to_zim(
820+
f"videos/{video_id}/{chapters_file.name}",
821+
chapters_file,
822+
callback=(delete_callback, chapters_file),
823+
)
824+
825+
def generate_chapters_vtt(self, video_id):
826+
"""generate the chapters file of a video if chapters available"""
827+
828+
metadata_file = self.videos_dir.joinpath(video_id, "video.info.json")
829+
if metadata_file.exists():
830+
with open(metadata_file, encoding="utf-8") as f:
831+
metadata = json.load(f)
832+
chapters = metadata.get("chapters", [])
833+
834+
if not chapters:
835+
logger.info(f"No chapters found for {video_id}")
836+
return
837+
838+
logger.info(f"Found {len(chapters)} chapters for {video_id}")
839+
840+
save_json(
841+
self.chapters_cache_dir,
842+
video_id,
843+
{"chapters": chapters},
844+
)
845+
846+
chapters_file = self.videos_dir.joinpath(video_id, "chapters.vtt")
847+
with chapters_file.open("w", encoding="utf8") as chapter_f:
848+
chapter_f.write("WEBVTT\n\n")
849+
for chapter in chapters:
850+
start = chapter["start_time"]
851+
end = chapter["end_time"]
852+
title = chapter["title"]
853+
854+
start_time = (
855+
f"{int(start//3600):02}:"
856+
f"{int((start%3600)//60):02}:"
857+
f"{int(start%60):02}."
858+
f"{int((start%1)*1000):03}"
859+
)
860+
end_time = (
861+
f"{int(end//3600):02}:"
862+
f"{int((end%3600)//60):02}:"
863+
f"{int(end%60):02}."
864+
f"{int((end%1)*1000):03}"
865+
)
866+
867+
chapter_f.write(f"{start_time} --> {end_time}\n")
868+
chapter_f.write(f"{title}\n\n")
869+
logger.info(f"Chapters file saved for {video_id}")
870+
self.add_chapters_to_zim(video_id)
871+
808872
def fetch_video_subtitles_list(self, video_id: str) -> Subtitles:
809873
"""fetch list of subtitles for a video"""
810874

811875
video_dir = self.videos_dir.joinpath(video_id)
812876
languages = [
813877
x.stem.split(".")[1]
814878
for x in video_dir.iterdir()
815-
if x.is_file() and x.name.endswith(".vtt")
879+
if x.is_file() and x.name.endswith(".vtt") and x.name != "chapters.vtt"
816880
]
817881

818882
def to_subtitle_object(lang) -> Subtitle:
@@ -855,7 +919,9 @@ def download_subtitles(self, video_id, options):
855919
"""download subtitles for a video"""
856920

857921
options_copy = options.copy()
858-
options_copy.update({"skip_download": True, "writethumbnail": False})
922+
options_copy.update(
923+
{"skip_download": True, "writethumbnail": False, "writeinfojson": True}
924+
)
859925
try:
860926
with yt_dlp.YoutubeDL(options_copy) as ydl:
861927
ydl.download([video_id])
@@ -883,6 +949,7 @@ def download_video_files_batch(self, options, videos_ids):
883949
video_id, options
884950
):
885951
self.download_subtitles(video_id, options)
952+
self.generate_chapters_vtt(video_id)
886953
succeeded.append(video_id)
887954
else:
888955
failed.append(video_id)
@@ -1010,6 +1077,12 @@ def get_subtitles(video_id) -> list[Subtitle]:
10101077
return []
10111078
return subtitles_list["subtitles"]
10121079

1080+
def get_chapters(video_id) -> list[Chapter]:
1081+
chapters_list = load_json(self.chapters_cache_dir, video_id)
1082+
if chapters_list is None:
1083+
return []
1084+
return chapters_list["chapters"]
1085+
10131086
def get_videos_list(playlist):
10141087
videos = load_mandatory_json(
10151088
self.cache_dir, f"playlist_{playlist.playlist_id}_videos"
@@ -1025,6 +1098,7 @@ def generate_video_object(video) -> Video:
10251098
author = videos_channels[video_id]
10261099
subtitles_list = get_subtitles(video_id)
10271100
channel_data = get_channel_json(author["channelId"])
1101+
chapters_list = get_chapters(video_id)
10281102

10291103
return Video(
10301104
id=video_id,
@@ -1043,6 +1117,10 @@ def generate_video_object(video) -> Video:
10431117
thumbnail_path=get_thumbnail_path(video_id),
10441118
subtitle_path=f"videos/{video_id}" if len(subtitles_list) > 0 else None,
10451119
subtitle_list=subtitles_list,
1120+
chapters_path=(
1121+
f"videos/{video_id}" if len(chapters_list) > 0 else None
1122+
),
1123+
chapter_list=chapters_list,
10461124
duration=videos_channels[video_id]["duration"],
10471125
)
10481126

zimui/src/assets/vjs-youtube.css

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
border-radius: 8px;
5656
}
5757

58-
5958
.video-js.vjs-fluid,
6059
.video-js.vjs-16-9,
6160
.video-js.vjs-4-3,
@@ -88,4 +87,19 @@ video.vjs-tech {
8887
height: 100% !important;
8988
max-height: 100vh;
9089
object-fit: contain;
91-
}
90+
}
91+
92+
.custom-marker {
93+
position: absolute;
94+
bottom: 0;
95+
width: 5px;
96+
height: 100%;
97+
background-color: #aaa;
98+
cursor: pointer;
99+
}
100+
101+
.vjs-time-tooltip {
102+
transform: translateY(-80%) !important;
103+
line-height: 1.5;
104+
padding: 3px 6px !important;
105+
}

0 commit comments

Comments
 (0)