-
Notifications
You must be signed in to change notification settings - Fork 14
/
reddit-scraper.py
38 lines (32 loc) · 1.17 KB
/
reddit-scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import praw
import pandas as pd
from datetime import datetime
import re
reddit = praw.Reddit(client_id='your_client_id',
client_secret='your_client_secret',
user_agent='your_user_agent')
subreddit = reddit.subreddit('subreddit_name')
data = {
'title': [],
'score': [],
'url': [],
'num_comments': [],
'created': [],
'author': [],
'selftext': [],
'flair': []
}
for submission in subreddit.hot(limit=50):
data['title'].append(submission.title)
data['score'].append(submission.score)
data['url'].append(submission.url)
data['num_comments'].append(submission.num_comments)
data['created'].append(datetime.fromtimestamp(submission.created))
data['author'].append(submission.author.name if submission.author else 'N/A')
data['selftext'].append(re.sub('\s+', ' ', submission.selftext.strip()))
data['flair'].append(submission.link_flair_text)
df = pd.DataFrame(data)
df.to_csv('reddit_scraper_output.csv', index=False)
top_post = subreddit.top(limit=1)
for submission in top_post:
print(f"Top post of all time:\nTitle: {submission.title}\nScore: {submission.score}\nLink: {submission.url}\n")