Skip to content

Commit dd5a2ab

Browse files
authored
Example of an Image scraper
It downloads all the images of the webpage and stores it locally
1 parent 578537c commit dd5a2ab

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

images.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import bs4
2+
from bs4 import BeautifulSoup as BS
3+
from urllib.request import urlopen as ureq
4+
5+
uClient = ureq("https://uwaterloo.ca/")
6+
thepage = uClient.read()
7+
uClient.close()
8+
soupdata = BS(thepage, 'lxml')
9+
10+
11+
soup = BS("https://uwaterloo.ca/", 'lxml')
12+
for img in soup.findAll("img"):
13+
temp = img.get("src")
14+
if temp[:1]=="/":
15+
image = "https://uwaterloo.ca/" + temp
16+
else:
17+
image = temp
18+
19+
nametemp = img.get("alt")
20+
if len(nametemp)==0:
21+
filename = str(i)
22+
i+=1
23+
else:
24+
filename=nametemp
25+
26+
imagefile = open(filename + ".jpeg", "wb")
27+
imagefile.write(ureq(image).read())
28+
imagefile.close()

0 commit comments

Comments
 (0)