Skip to content

Commit 2639728

Browse files
authored
Product Search Scraper
The script stores all search results from an e commerce site in a .csv file.
1 parent 90f45e9 commit 2639728

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

a.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from urllib.request import urlopen as ureq
2+
from bs4 import BeautifulSoup as soup
3+
4+
my_url = "https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38?Tpk=graphics%20card"
5+
#opening up connection and grabbing the link
6+
uClient = ureq(my_url)
7+
page_html = uClient.read()
8+
uClient.close()
9+
10+
#html parsing
11+
page_soup = soup(page_html, 'html.parser')
12+
13+
#grabs each product
14+
containers = page_soup.findAll("div",{"class":"item-container"})
15+
16+
17+
filename = "products.csv"
18+
f = open(filename, "w")
19+
20+
headers = "brand, product_name, shipping\n"
21+
22+
f.write(headers)
23+
24+
for container in containers:
25+
brand = container.div.div.a.img["title"]
26+
#finding the product name of each result
27+
title_container = container.findAll("a", {"class":"item-title"})
28+
product_name = title_container[0].text
29+
#finding the shipping of the item
30+
shipping_container = container.findAll("li",{"class":"price-ship"})
31+
shipping = shipping_container[0].text.strip()
32+
33+
34+
print("brand: " + brand)
35+
print("product_name: " + product_name)
36+
print("shipping: " + shipping)
37+
38+
f.write(brand + "," + product_name.replace(","," ") + "," + shipping + "\n")
39+
40+
f.close()

0 commit comments

Comments
 (0)