-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbot.py
158 lines (135 loc) · 4.52 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
from argparse import RawTextHelpFormatter
import codecs
from time import sleep
import json
import os
import requests
import api
import config
import lib
description = u'Monitorea tuiter en búsqueda de publidad estatal'
parser = argparse.ArgumentParser(
description=description,
formatter_class=RawTextHelpFormatter,
)
parser.add_argument(
'-u',
'--update',
action='store',
help='''Bajar nuevos tuits, actualizar database, y capturar pantalla. Es
necesario especificar el archivo a usar.''',
metavar='lista_autoridades2.csv',
required=False,
dest='update',
)
parser.add_argument(
'-r',
'--report',
action='store_true',
help='Reportar tuits que se sospechen contengan publicidad municipal',
required=False,
dest='report',
)
parser.add_argument(
'-f',
'--first_time_download',
action='store_true',
help=u'Descargar 3200 tuits más recientes',
required=False,
dest='firstime',
)
args = parser.parse_args()
def get_user_list(lista_autoridades):
filename = os.path.join(config.local_folder, lista_autoridades)
with codecs.open(filename, "r", "utf-8") as handle:
user_list = [line.strip().split(",") for line in handle if '@' in line]
return user_list
def get_recent_tweets(user_list):
oauth = api.get_oauth()
url = "https://api.twitter.com/1.1/statuses/user_timeline.json"
for user in user_list:
max_id = 0
new_max_id = None
twitter_handle = user[1].replace("@", "")
filename = twitter_handle + ".json"
while max_id != new_max_id:
print "\nMax_id", max_id
print "New_max_id", new_max_id
print user, "\n"
if max_id == 0:
payload = {
'screen_name': twitter_handle,
'count': 200,
}
elif max_id is None:
print "yes none"
new_max_id = None
payload = {
'screen_name': twitter_handle,
'count': 200,
}
else:
new_max_id = get_max_id(filename)
max_id = new_max_id
payload = {
'screen_name': twitter_handle,
'count': 200,
'max_id': max_id,
}
try:
sleep(4)
r = requests.get(url, auth=oauth, params=payload)
data = r.json()
for item in data:
tweet = {}
tweet['tweet_id'] = item['id']
tweet['screen_name'] = item['user']['screen_name'].lower()
tweet['user_id'] = item['user']['id']
tweet['status'] = item['text']
tweet['created_at'] = item['created_at']
tweet['utc_offset'] = item['user']['utc_offset']
if 'geo' in item and item['geo']:
tweet['latitude'] = item['geo']['coordinates'][0]
tweet['longitude'] = item['geo']['coordinates'][1]
f = codecs.open(filename, "a+", "utf-8")
f.write(json.dumps(tweet) + "\n")
f.close()
except requests.exceptions.ConnectionError as e:
print("Error", e)
max_id = get_max_id(filename)
def get_max_id(filename):
try:
with codecs.open(filename, "r", "utf-8") as handle:
ids = [json.loads(i)['tweet_id'] for i in handle]
# return smallest number
return sorted(ids)[0]
except:
return None
def main():
# publicidad estatal prohibida desde el 24 de Enero
# authorities list
# run this the first time only
if args.firstime:
user_list = get_user_list()
get_recent_tweets(user_list)
# this is for updating, run as needed
if args.update:
print "** Updating database **"
lib.update_our_database(args.update.strip())
print "** Making report of tweets as JSON file **"
lib.report_cherry()
print "** Do retweets **"
lib.do_retweets()
print "** Get profile pictures **"
user_list = get_user_list(args.update.strip())
lib.get_profile_image_url(user_list)
print "** Generating site **"
lib.generate_site()
if args.report:
print "** Making report of tweets as JSON file **"
lib.report_cherry()
if __name__ == "__main__":
main()