-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.py
executable file
·30 lines (28 loc) · 1.31 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from bs4 import BeautifulSoup, NavigableString
def html_to_text(html):
"Creates a formatted text email message from a rendered html template (page)"
soup = BeautifulSoup(html, 'html.parser')
# Ignore anything in head
body, text = soup.body, [] # body as string & text as empty list
if body is None:
return ""
else:
for element in body.descendants:
# We use type and not isinstance since comments, cdata, etc are subclasses that we don't want
if type(element) == NavigableString:
# We use the assumption that other tags can't be inside a script or style
if element.parent.name in ('script', 'style'):
continue
elif element.parent.name == 'a':
# replace link text with the link
# text += [element.parent['href']]
continue
# remove any multiple and leading/trailing whitespace
string = ' '.join(element.string.split())
if string:
if element.parent.name == 'p':
# Add extra paragraph formatting newline
string = '\n' + string
text += [string]
doc = '\n'.join(text) # .encode('utf-8')
return doc