-
Notifications
You must be signed in to change notification settings - Fork 92
/
generate-legacy-pages.py
78 lines (65 loc) · 2.77 KB
/
generate-legacy-pages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import datetime, re
import jinja2, markdown2, yaml
from bs4 import BeautifulSoup
from xml.sax.saxutils import escape
# NB: Avoid annoying BeautifulSoup warnings of the following kind:
#
# MarkupResemblesLocatorWarning: The input looks more like a URL than markup.
# You may want to use an HTTP client like requests to get the document behind
# the URL, and feed that document to Beautiful Soup.
#
# See: https://stackoverflow.com/a/41496131/1207769
import warnings
from bs4 import MarkupResemblesLocatorWarning
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
def html(markdown_string):
s = markdown2.markdown(markdown_string).rstrip()
if s.startswith('<p>') and s.endswith('</p>') and s.count('</p>') == 1:
# Strip containing <p>...</p> tags.
return s[3:-4]
return s
def plain(html_string):
return BeautifulSoup(html_string, features="html.parser").get_text().rstrip()
template_loader = jinja2.FileSystemLoader(searchpath="./")
template_env = jinja2.Environment(loader=template_loader)
now = datetime.datetime.now()
date = now.strftime("%d %B %Y")
time = now.strftime("%H:%M")
# Parse the YAML source to a sites data structure.
with open('sites.yml', 'r') as stream:
sites = yaml.safe_load(stream)
# Open the XML template.
xml_template = template_env.get_template('sites.xml.template')
# Render sites.xml from the sites data structure.
xml_data = xml_template.render(sites=[{
# NB: No Markdown or HTML allowed in name or url!
'name': site['name'],
'url': site['url'],
'description': escape(plain(html(site['description']))),
'maintainer': escape(', '.join([plain(html(m)) for m in site['maintainers']]))
} for site in sites['sites']], date=date, time=time)
with open('sites.xml', 'w') as sites_xml_file:
sites_xml_file.write(xml_data)
# Tweak the XML: HTTPS -> HTTP for select URLs.
# And write the tweaked result to sites_insecure.xml.
xml_data_insecure = re.sub(
'https://(update.imagej.net|update.fiji.sc|sites.imagej.net)/',
'http://\\1/', xml_data)
with open('sites_insecure.xml', 'w') as sites_xml_file_insecure:
sites_xml_file_insecure.write(xml_data_insecure)
# Validate the resulting XML files.
import xml.dom.minidom as dom
dom.parse('sites_insecure.xml')
dom.parse('sites.xml')
# Open the HTML template.
html_template = template_env.get_template('sites.html.template')
# Render sites.html from the sites data structure.
result = html_template.render(sites=[{
# NB: No Markdown or HTML allowed in name or url!
'name': site['name'],
'url': site['url'],
'description': html(site['description']),
'maintainer': ', '.join([html(m) for m in site['maintainers']])
} for site in sites['sites']], date=date, time=time)
with open('sites.html', 'w') as sites_html_file:
sites_html_file.write(result)