-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
192 lines (149 loc) · 6.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#v0.9 Bibelo 10th of Nov, 2020
import datetime
import hashlib
import json
import os
import requests
from configparser import ConfigParser
from lxml import etree as ET
def init():
global ini_config_file, basis_xml_file, website_path, resources_path
ini_config_file = 'config.ini'
basis_xml_file = 'podcast_ref.xml'
website_path = '/var/www/podcast'
resources_path = 'resources'
def theme_page_download(jw_url, podcast_html_file):
# Download the page containing the index of all MP4 videos
print("Downloading the JW media index")
new_index = requests.get(jw_url)
new_index_content = new_index.text
new_hash = hashlib.sha256(new_index_content.encode('utf-8')).hexdigest()
# Has a previous page ever been downloaded?
if os.path.exists(podcast_html_file):
previous_index = open(podcast_html_file, 'r')
previous_index_content = previous_index.read()
previous_index.close()
previous_hash = hashlib.sha256(previous_index_content.encode('utf-8')).hexdigest()
# In this case, we compare the old index and the new index (with their hashes)
if new_hash == previous_hash:
print("JW media index has not been updated on jw.org")
return
else:
print("JW media index has been updated on jw.org")
previous_index_write = open(podcast_html_file, 'w')
previous_index_write.write(new_index_content)
previous_index_write.close()
else:
print("A local index did not exist and has been created")
new_file = open(podcast_html_file, 'w')
new_file.write(new_index_content)
new_file.close()
def theme_page_extract(podcast_html_file):
with open(podcast_html_file, 'r') as f:
json_index = json.load(f)
podcast_fields = []
for i in range(len(json_index['category']['media'])):
title = json_index['category']['media'][i]['title']
media_url = json_index['category']['media'][i]['files'][0]['progressiveDownloadURL']
media_filename = media_url.split('/')[-1][:-4]
media_pubDate = json_index['category']['media'][i]['firstPublished']
# media_thumbnail = json_index['category']['media'][i]['images']['sqr']['sm']
# podcast_fields.append([title, media_url, media_filename, media_pubDate, media_thumbnail])
podcast_fields.append([title, media_url, media_filename, media_pubDate, None])
return podcast_fields
def xml_write(xml_basis_file, podcast_fields, general_parameters, podcast_parameters):
general_title = general_parameters[0]
general_url = general_parameters[1]
general_description = general_parameters[2]
podcast_name = podcast_parameters[0]
podcast_file = podcast_parameters[1]
podcast_logo_filename = podcast_parameters[2]
# Print the first episode title, to make sure it works
# print(podcast_fields[4])
tree = ET.XMLParser(remove_blank_text=True)
root = ET.parse(xml_basis_file, tree).getroot()
# <title> of the Podcast
root[0][0].text = general_title + " " + podcast_name
# <link> URL of the Podcast
root[0][1].text = general_url + podcast_file
# <description> of the Podcast
root[0][3].text = general_description + " " + podcast_name
# <image><url> URL of the image of the Podcast
root[0][4][0].text = general_url + podcast_logo_filename
# <image><title> Title of the logo
root[0][4][1].text = podcast_name
# <image><link> = URL of the Podcast
root[0][4][2].text = general_url + podcast_file
print("Generating XML for Podcast " + root[0][0].text)
print(root[0][1].text)
for i in range(len(podcast_fields)):
title = podcast_fields[i][0]
description = podcast_fields[i][0]
link = podcast_fields[i][1]
image_link = podcast_fields[i][4]
length = podcast_fields[i][5]
guid = podcast_fields[i][2]
media_date = datetime.datetime.strptime(podcast_fields[i][3], '%Y-%m-%dT%H:%M:%S.%fZ').strftime("%b %d, %Y")
item = ET.Element("item")
item_title = ET.SubElement(item, "title")
item_title.text = title
item_description = ET.SubElement(item, "description")
item_description.text = description
item_link = ET.SubElement(item, "link")
item_link.text = link
item_enclosure = ET.SubElement(item, "enclosure")
item_enclosure.set("length", length)
item_enclosure.set("type", "audio/mpeg")
item_enclosure.set("url", link)
item_pubDate = ET.SubElement(item, "pubDate")
item_pubDate.text = media_date
# For future addition to make it iphone compatible
# OR to have a picture for each element?
# item_itunes_author = ET.SubElement(item, "{itunes}author")
# item_itunes_author.text = "Author Name"
# item_itunes_duration = ET.SubElement(item, "{itunes}duration")
# item_itunes_duration.text = "00:32:16"
# item_itunes_explicit = ET.SubElement(item, "{itunes}explicit")
# item_itunes_explicit.text = "no"
item_guid = ET.SubElement(item, "guid")
item_guid.text = guid
# the new item is added under the channel section
for channel in root.iter('channel'):
channel.append(item)
tree = ET.ElementTree(root)
tree.write(podcast_file, encoding='utf-8', pretty_print=True, xml_declaration=True)
# ET.dump(root)
def main():
init()
ini_config = ConfigParser()
ini_config.read(ini_config_file)
general_title = ini_config['General']['general_title']
general_url = ini_config['General']['general_url']
general_description = ini_config['General']['general_description']
general_local_path = ini_config['General']['general_local_path']
general_parameters = [general_title, general_url, general_description]
for section in ini_config.sections():
if section == 'General':
continue
jw_url = ini_config[section]['jw_url']
podcast_name = ini_config[section]['podcast_name']
podcast_xml_file = ini_config[section]['podcast_file'] + ".xml"
podcast_html_file = ini_config[section]['podcast_file'] + ".html"
podcast_logo_filename = ini_config[section]['podcast_logo_filename']
podcast_parameters = [podcast_name, podcast_xml_file, podcast_logo_filename]
print("For podcast " + podcast_name)
theme_page_download(jw_url, podcast_html_file)
# Extract the different fields from the index and put them in list
podcast_fields = theme_page_extract(podcast_html_file)
# here, we just add the length
for i in range((len(podcast_fields))):
media_length = "1024"
podcast_fields[i].append(media_length)
# Creation of the whole XML file!
xml_write(basis_xml_file, podcast_fields, general_parameters, podcast_parameters)
print("Copying XML Podcast file and logo for " + podcast_name)
print("Copying to " + general_local_path + "\n")
os.system("mv " + podcast_xml_file + " " + general_local_path)
os.system('cp ' + resources_path + "/" + podcast_logo_filename + " " + general_local_path)
if __name__ == "__main__":
main()