File tree Expand file tree Collapse file tree 3 files changed +60
-0
lines changed Expand file tree Collapse file tree 3 files changed +60
-0
lines changed Original file line number Diff line number Diff line change
1
+ from bs4 import BeautifulSoup
2
+ import requests
3
+ from utils import get_summary
4
+
5
+ print ('==========================================================' )
6
+
7
+ site = 'https://www.apnews.com'
8
+
9
+ source = requests .get (site ).text
10
+
11
+ soup = BeautifulSoup (source , 'lxml' )
12
+
13
+ first_story = soup .find ('a' , class_ = "headline" )
14
+
15
+ print (first_story .text )
16
+ print (get_summary (site + first_story ['href' ]))
17
+ print ('==========================================================' )
18
+
19
+ second_story_container = soup .find ('div' , class_ = "RelatedStory" )
20
+ second_story_link = second_story_container .a
21
+ second_story_title = second_story_container .find ('div' , class_ = "headline" )
22
+
23
+ print (second_story_title .text )
24
+ print (get_summary (site + second_story_link ['href' ]))
25
+ print ('==========================================================' )
Original file line number Diff line number Diff line change
1
+ from bs4 import BeautifulSoup
2
+ import requests
3
+ from utils import get_summary
4
+
5
+ print ('==========================================================' )
6
+
7
+ site = 'https://www.reuters.com'
8
+
9
+ source = requests .get (site ).text
10
+
11
+ soup = BeautifulSoup (source , 'lxml' )
12
+
13
+ first_story_container = soup .find ('h2' , class_ = "story-title" )
14
+ first_story = first_story_container .a
15
+
16
+ print (first_story .text )
17
+ print (get_summary (site + first_story ['href' ]))
18
+ print ('==========================================================' )
19
+
20
+ second_story_container = soup .find ('div' , class_ = "news-headline-list" )
21
+ second_story_link = second_story_container .find ('a' )
22
+ second_story_title = second_story_container .find ('h3' , class_ = "story-title" )
23
+
24
+ print (second_story_title .text .strip ())
25
+ print (get_summary (site + second_story_link ['href' ]))
26
+ print ('==========================================================' )
Original file line number Diff line number Diff line change
1
+ from newspaper import Article
2
+
3
+ def get_summary (url ):
4
+ article = Article (url )
5
+ article .download ()
6
+ article .parse ()
7
+ article .nlp ()
8
+ return article .summary
9
+
You can’t perform that action at this time.
0 commit comments