58
58
from youtube2zim .schemas import (
59
59
Author ,
60
60
Channel ,
61
+ Chapter ,
61
62
Config ,
62
63
HomePlaylists ,
63
64
Playlist ,
@@ -215,6 +216,10 @@ def cache_dir(self):
215
216
def subtitles_cache_dir (self ):
216
217
return self .cache_dir .joinpath ("subtitles" )
217
218
219
+ @property
220
+ def chapters_cache_dir (self ):
221
+ return self .cache_dir .joinpath ("chapters" )
222
+
218
223
@property
219
224
def videos_dir (self ):
220
225
return self .build_dir .joinpath ("videos" )
@@ -455,6 +460,7 @@ def prepare_build_folder(self):
455
460
# cache folder to store youtube-api results
456
461
self .cache_dir .mkdir (exist_ok = True )
457
462
self .subtitles_cache_dir .mkdir (exist_ok = True )
463
+ self .chapters_cache_dir .mkdir (exist_ok = True )
458
464
459
465
# make videos placeholder
460
466
self .videos_dir .mkdir (exist_ok = True )
@@ -805,14 +811,72 @@ def download_thumbnail(self, video_id, options):
805
811
self .upload_to_cache (s3_key , thumbnail_path , preset .VERSION )
806
812
return True
807
813
814
+ def add_chapters_to_zim (self , video_id : str ):
815
+ """add chapters file to zim file"""
816
+
817
+ chapters_file = self .videos_dir .joinpath (video_id , "chapters.vtt" )
818
+ if chapters_file .exists ():
819
+ self .add_file_to_zim (
820
+ f"videos/{ video_id } /{ chapters_file .name } " ,
821
+ chapters_file ,
822
+ callback = (delete_callback , chapters_file ),
823
+ )
824
+
825
+ def generate_chapters_vtt (self , video_id ):
826
+ """generate the chapters file of a video if chapters available"""
827
+
828
+ metadata_file = self .videos_dir .joinpath (video_id , "video.info.json" )
829
+ if metadata_file .exists ():
830
+ with open (metadata_file , encoding = "utf-8" ) as f :
831
+ metadata = json .load (f )
832
+ chapters = metadata .get ("chapters" , [])
833
+
834
+ if not chapters :
835
+ logger .info (f"No chapters found for { video_id } " )
836
+ return
837
+
838
+ logger .info (f"Found { len (chapters )} chapters for { video_id } " )
839
+
840
+ save_json (
841
+ self .chapters_cache_dir ,
842
+ video_id ,
843
+ {"chapters" : chapters },
844
+ )
845
+
846
+ chapters_file = self .videos_dir .joinpath (video_id , "chapters.vtt" )
847
+ with chapters_file .open ("w" , encoding = "utf8" ) as chapter_f :
848
+ chapter_f .write ("WEBVTT\n \n " )
849
+ for chapter in chapters :
850
+ start = chapter ["start_time" ]
851
+ end = chapter ["end_time" ]
852
+ title = chapter ["title" ]
853
+
854
+ start_time = (
855
+ f"{ int (start // 3600 ):02} :"
856
+ f"{ int ((start % 3600 )// 60 ):02} :"
857
+ f"{ int (start % 60 ):02} ."
858
+ f"{ int ((start % 1 )* 1000 ):03} "
859
+ )
860
+ end_time = (
861
+ f"{ int (end // 3600 ):02} :"
862
+ f"{ int ((end % 3600 )// 60 ):02} :"
863
+ f"{ int (end % 60 ):02} ."
864
+ f"{ int ((end % 1 )* 1000 ):03} "
865
+ )
866
+
867
+ chapter_f .write (f"{ start_time } --> { end_time } \n " )
868
+ chapter_f .write (f"{ title } \n \n " )
869
+ logger .info (f"Chapters file saved for { video_id } " )
870
+ self .add_chapters_to_zim (video_id )
871
+
808
872
def fetch_video_subtitles_list (self , video_id : str ) -> Subtitles :
809
873
"""fetch list of subtitles for a video"""
810
874
811
875
video_dir = self .videos_dir .joinpath (video_id )
812
876
languages = [
813
877
x .stem .split ("." )[1 ]
814
878
for x in video_dir .iterdir ()
815
- if x .is_file () and x .name .endswith (".vtt" )
879
+ if x .is_file () and x .name .endswith (".vtt" ) and x . name != "chapters.vtt"
816
880
]
817
881
818
882
def to_subtitle_object (lang ) -> Subtitle :
@@ -855,7 +919,9 @@ def download_subtitles(self, video_id, options):
855
919
"""download subtitles for a video"""
856
920
857
921
options_copy = options .copy ()
858
- options_copy .update ({"skip_download" : True , "writethumbnail" : False })
922
+ options_copy .update (
923
+ {"skip_download" : True , "writethumbnail" : False , "writeinfojson" : True }
924
+ )
859
925
try :
860
926
with yt_dlp .YoutubeDL (options_copy ) as ydl :
861
927
ydl .download ([video_id ])
@@ -883,6 +949,7 @@ def download_video_files_batch(self, options, videos_ids):
883
949
video_id , options
884
950
):
885
951
self .download_subtitles (video_id , options )
952
+ self .generate_chapters_vtt (video_id )
886
953
succeeded .append (video_id )
887
954
else :
888
955
failed .append (video_id )
@@ -1010,6 +1077,12 @@ def get_subtitles(video_id) -> list[Subtitle]:
1010
1077
return []
1011
1078
return subtitles_list ["subtitles" ]
1012
1079
1080
+ def get_chapters (video_id ) -> list [Chapter ]:
1081
+ chapters_list = load_json (self .chapters_cache_dir , video_id )
1082
+ if chapters_list is None :
1083
+ return []
1084
+ return chapters_list ["chapters" ]
1085
+
1013
1086
def get_videos_list (playlist ):
1014
1087
videos = load_mandatory_json (
1015
1088
self .cache_dir , f"playlist_{ playlist .playlist_id } _videos"
@@ -1025,6 +1098,7 @@ def generate_video_object(video) -> Video:
1025
1098
author = videos_channels [video_id ]
1026
1099
subtitles_list = get_subtitles (video_id )
1027
1100
channel_data = get_channel_json (author ["channelId" ])
1101
+ chapters_list = get_chapters (video_id )
1028
1102
1029
1103
return Video (
1030
1104
id = video_id ,
@@ -1043,6 +1117,10 @@ def generate_video_object(video) -> Video:
1043
1117
thumbnail_path = get_thumbnail_path (video_id ),
1044
1118
subtitle_path = f"videos/{ video_id } " if len (subtitles_list ) > 0 else None ,
1045
1119
subtitle_list = subtitles_list ,
1120
+ chapters_path = (
1121
+ f"videos/{ video_id } " if len (chapters_list ) > 0 else None
1122
+ ),
1123
+ chapter_list = chapters_list ,
1046
1124
duration = videos_channels [video_id ]["duration" ],
1047
1125
)
1048
1126
0 commit comments