From f79c75c9b79bbd7d7e60d2e837bac49bdc7b2a7c Mon Sep 17 00:00:00 2001 From: Arjit Das Date: Mon, 24 Mar 2025 19:50:19 +0530 Subject: [PATCH] feat: Add support for video chapters --- CHANGELOG.md | 4 + scraper/src/youtube2zim/schemas.py | 10 ++ scraper/src/youtube2zim/scraper.py | 82 +++++++++- zimui/src/assets/vjs-youtube.css | 18 ++- zimui/src/components/video/VideoPlayer.vue | 170 ++++++++++++++++----- zimui/src/types/Videos.ts | 8 + zimui/src/types/videojs.ts | 6 + zimui/src/utils/format-utils.ts | 15 ++ zimui/src/views/VideoPlayerView.vue | 26 +++- 9 files changed, 294 insertions(+), 45 deletions(-) create mode 100644 zimui/src/types/videojs.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 357ec852..a70e275b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add support for video chapters (#170) + ## [3.3.0] - 2024-12-05 ### Changed diff --git a/scraper/src/youtube2zim/schemas.py b/scraper/src/youtube2zim/schemas.py index edba89c0..92c5df1d 100644 --- a/scraper/src/youtube2zim/schemas.py +++ b/scraper/src/youtube2zim/schemas.py @@ -26,6 +26,14 @@ class Subtitle(CamelModel): name: str +class Chapter(CamelModel): + """Class to serialize data about YouTube Video chapter""" + + start_time: float | int + end_time: float | int + title: str + + class Subtitles(CamelModel): """Class to serialize data about a list of YouTube video subtitles.""" @@ -44,6 +52,8 @@ class Video(CamelModel): thumbnail_path: str | None = None subtitle_path: str | None = None subtitle_list: list[Subtitle] + chapters_path: str | None = None + chapter_list: list[Chapter] duration: str diff --git a/scraper/src/youtube2zim/scraper.py b/scraper/src/youtube2zim/scraper.py index 95fa75d4..c2fc3755 100644 --- a/scraper/src/youtube2zim/scraper.py +++ b/scraper/src/youtube2zim/scraper.py @@ -58,6 +58,7 @@ from youtube2zim.schemas import ( Author, Channel, + Chapter, Config, HomePlaylists, Playlist, @@ -215,6 +216,10 @@ def cache_dir(self): def subtitles_cache_dir(self): return self.cache_dir.joinpath("subtitles") + @property + def chapters_cache_dir(self): + return self.cache_dir.joinpath("chapters") + @property def videos_dir(self): return self.build_dir.joinpath("videos") @@ -455,6 +460,7 @@ def prepare_build_folder(self): # cache folder to store youtube-api results self.cache_dir.mkdir(exist_ok=True) self.subtitles_cache_dir.mkdir(exist_ok=True) + self.chapters_cache_dir.mkdir(exist_ok=True) # make videos placeholder self.videos_dir.mkdir(exist_ok=True) @@ -805,6 +811,64 @@ def download_thumbnail(self, video_id, options): self.upload_to_cache(s3_key, thumbnail_path, preset.VERSION) return True + def add_chapters_to_zim(self, video_id: str): + """add chapters file to zim file""" + + chapters_file = self.videos_dir.joinpath(video_id, "chapters.vtt") + if chapters_file.exists(): + self.add_file_to_zim( + f"videos/{video_id}/{chapters_file.name}", + chapters_file, + callback=(delete_callback, chapters_file), + ) + + def generate_chapters_vtt(self, video_id): + """generate the chapters file of a video if chapters available""" + + metadata_file = self.videos_dir.joinpath(video_id, "video.info.json") + if metadata_file.exists(): + with open(metadata_file, encoding="utf-8") as f: + metadata = json.load(f) + chapters = metadata.get("chapters", []) + + if not chapters: + logger.info(f"No chapters found for {video_id}") + return + + logger.info(f"Found {len(chapters)} chapters for {video_id}") + + save_json( + self.chapters_cache_dir, + video_id, + {"chapters": chapters}, + ) + + chapters_file = self.videos_dir.joinpath(video_id, "chapters.vtt") + with chapters_file.open("w", encoding="utf8") as chapter_f: + chapter_f.write("WEBVTT\n\n") + for chapter in chapters: + start = chapter["start_time"] + end = chapter["end_time"] + title = chapter["title"] + + start_time = ( + f"{int(start//3600):02}:" + f"{int((start%3600)//60):02}:" + f"{int(start%60):02}." + f"{int((start%1)*1000):03}" + ) + end_time = ( + f"{int(end//3600):02}:" + f"{int((end%3600)//60):02}:" + f"{int(end%60):02}." + f"{int((end%1)*1000):03}" + ) + + chapter_f.write(f"{start_time} --> {end_time}\n") + chapter_f.write(f"{title}\n\n") + logger.info(f"Chapters file saved for {video_id}") + self.add_chapters_to_zim(video_id) + def fetch_video_subtitles_list(self, video_id: str) -> Subtitles: """fetch list of subtitles for a video""" @@ -812,7 +876,7 @@ def fetch_video_subtitles_list(self, video_id: str) -> Subtitles: languages = [ x.stem.split(".")[1] for x in video_dir.iterdir() - if x.is_file() and x.name.endswith(".vtt") + if x.is_file() and x.name.endswith(".vtt") and x.name != "chapters.vtt" ] def to_subtitle_object(lang) -> Subtitle: @@ -855,7 +919,9 @@ def download_subtitles(self, video_id, options): """download subtitles for a video""" options_copy = options.copy() - options_copy.update({"skip_download": True, "writethumbnail": False}) + options_copy.update( + {"skip_download": True, "writethumbnail": False, "writeinfojson": True} + ) try: with yt_dlp.YoutubeDL(options_copy) as ydl: ydl.download([video_id]) @@ -883,6 +949,7 @@ def download_video_files_batch(self, options, videos_ids): video_id, options ): self.download_subtitles(video_id, options) + self.generate_chapters_vtt(video_id) succeeded.append(video_id) else: failed.append(video_id) @@ -1010,6 +1077,12 @@ def get_subtitles(video_id) -> list[Subtitle]: return [] return subtitles_list["subtitles"] + def get_chapters(video_id) -> list[Chapter]: + chapters_list = load_json(self.chapters_cache_dir, video_id) + if chapters_list is None: + return [] + return chapters_list["chapters"] + def get_videos_list(playlist): videos = load_mandatory_json( self.cache_dir, f"playlist_{playlist.playlist_id}_videos" @@ -1025,6 +1098,7 @@ def generate_video_object(video) -> Video: author = videos_channels[video_id] subtitles_list = get_subtitles(video_id) channel_data = get_channel_json(author["channelId"]) + chapters_list = get_chapters(video_id) return Video( id=video_id, @@ -1043,6 +1117,10 @@ def generate_video_object(video) -> Video: thumbnail_path=get_thumbnail_path(video_id), subtitle_path=f"videos/{video_id}" if len(subtitles_list) > 0 else None, subtitle_list=subtitles_list, + chapters_path=( + f"videos/{video_id}" if len(chapters_list) > 0 else None + ), + chapter_list=chapters_list, duration=videos_channels[video_id]["duration"], ) diff --git a/zimui/src/assets/vjs-youtube.css b/zimui/src/assets/vjs-youtube.css index 722df671..60e11566 100644 --- a/zimui/src/assets/vjs-youtube.css +++ b/zimui/src/assets/vjs-youtube.css @@ -55,7 +55,6 @@ border-radius: 8px; } - .video-js.vjs-fluid, .video-js.vjs-16-9, .video-js.vjs-4-3, @@ -88,4 +87,19 @@ video.vjs-tech { height: 100% !important; max-height: 100vh; object-fit: contain; -} \ No newline at end of file +} + +.custom-marker { + position: absolute; + bottom: 0; + width: 5px; + height: 100%; + background-color: #aaa; + cursor: pointer; +} + +.vjs-time-tooltip { + transform: translateY(-80%) !important; + line-height: 1.5; + padding: 3px 6px !important; +} diff --git a/zimui/src/components/video/VideoPlayer.vue b/zimui/src/components/video/VideoPlayer.vue index 5e49ed91..0ba8415e 100644 --- a/zimui/src/components/video/VideoPlayer.vue +++ b/zimui/src/components/video/VideoPlayer.vue @@ -1,11 +1,13 @@ diff --git a/zimui/src/types/Videos.ts b/zimui/src/types/Videos.ts index fa802d64..de6b6ab5 100644 --- a/zimui/src/types/Videos.ts +++ b/zimui/src/types/Videos.ts @@ -10,6 +10,8 @@ export interface Video { thumbnailPath?: string subtitlePath?: string subtitleList: Subtitle[] + chaptersPath?: string + chapterList: Chapter[] duration: string } @@ -25,3 +27,9 @@ export default interface Subtitle { code: string name: string } + +export interface Chapter { + startTime: number + endTime: number + title: string +} diff --git a/zimui/src/types/videojs.ts b/zimui/src/types/videojs.ts new file mode 100644 index 00000000..eee1daaf --- /dev/null +++ b/zimui/src/types/videojs.ts @@ -0,0 +1,6 @@ +import Component from 'video.js/dist/types/component' + +export default interface TimeTooltip extends Component { + update: (seekBarRect: DOMRect, seekBarPoint: number, time: string) => void + write: (text: string) => void +} diff --git a/zimui/src/utils/format-utils.ts b/zimui/src/utils/format-utils.ts index 0c03e028..342b1c6f 100644 --- a/zimui/src/utils/format-utils.ts +++ b/zimui/src/utils/format-utils.ts @@ -36,3 +36,18 @@ export const truncateText = (text: string, maxLength: number): string => { } return text } + +export const timeToSeconds = (timestr: string) => { + const parts = timestr.split(':').map(Number) + let seconds = 0 + + if (parts.length === 3) { + seconds = parts[0] * 3600 + parts[1] * 60 + parts[2] + } else if (parts.length === 2) { + seconds = parts[0] * 60 + parts[1] + } else if (parts.length === 1) { + seconds = parts[0] + } + + return seconds +} diff --git a/zimui/src/views/VideoPlayerView.vue b/zimui/src/views/VideoPlayerView.vue index b778538e..e601d69d 100644 --- a/zimui/src/views/VideoPlayerView.vue +++ b/zimui/src/views/VideoPlayerView.vue @@ -85,6 +85,10 @@ const videoPoster = computed(() => { return video.value?.thumbnailPath || '' }) +const chapterList = computed(() => { + return video.value?.chapterList ?? [] +}) + const subtitles = computed(() => { return video.value?.subtitleList.map((subtitle) => { return { @@ -96,6 +100,25 @@ const subtitles = computed(() => { }) }) +const chapters = computed(() => { + if (!video.value?.chaptersPath) { + return [] + } + return [ + { + kind: 'chapters', + src: `${video.value?.chaptersPath}/chapters.vtt`, + srclang: 'en', + label: 'Chapters', + default: true + } + ] +}) + +const tracks = computed(() => { + return [...subtitles.value, ...chapters.value] +}) + const videoOptions = ref({ controls: true, autoplay: true, @@ -118,7 +141,7 @@ const videoOptions = ref({ type: videoFormat } ], - tracks: subtitles + tracks: tracks }) const currentVideoIndex = computed(() => { @@ -187,6 +210,7 @@ watch(