-
Notifications
You must be signed in to change notification settings - Fork 3
/
scrape.py
44 lines (37 loc) · 1.21 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from scrapeComments import scrapeComments
import argparse
import os
import json
import csv
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('source')
parser.add_argument('-o', '--output')
args = parser.parse_args()
def saveComments(format, comments):
print format
for i, comment in enumerate(comments):
if format == 'json' or format is None:
fileName = 'comments-{0}.json'.format(i)
outFile = open(fileName,'w')
json.dump(comment, outFile, indent=4)
outFile.close()
elif format == 'csv':
keys = comment[0].keys()
fileName = 'comments-{0}.csv'.format(i)
with open(fileName, 'w') as f:
w = csv.DictWriter(f, keys)
w.writeheader()
w.writerows(comment)
fileOrUrl = args.source
fileName, fileExtension = os.path.splitext(fileOrUrl)
comments = []
if fileExtension == '.txt':
lines = [line.rstrip('\n') for line in open(fileOrUrl)]
for line in lines:
output = scrapeComments(line)
comments.append(output)
else:
output = scrapeComments(fileName)
comments.append(output)
saveComments(args.output, comments)