|
5 | 5 | import json
|
6 | 6 | import config
|
7 | 7 | from utils import *
|
| 8 | +from analyser import * |
| 9 | +import time |
8 | 10 |
|
9 | 11 | # This is the listener, resposible for receiving data
|
10 | 12 | class DBListener(tweepy.StreamListener):
|
| 13 | + def __init__(self ,company_name): |
| 14 | + self.company_name = company_name |
| 15 | + create_tweets_table(DATABASES['RAW_TWEETS_DB'], self.company_name) |
| 16 | + |
11 | 17 | def on_data(self, data):
|
12 |
| - # Parsing |
| 18 | + # Parsing |
| 19 | + # print data |
13 | 20 | decoded = json.loads(data)
|
| 21 | + # print "PASSES" |
| 22 | + # file = open('NEW_JSON.json', 'wb') |
| 23 | + # json.dump(decoded,file,sort_keys = True,indent = 4) |
| 24 | + |
| 25 | + |
| 26 | + if "id" not in decoded: |
| 27 | + print "ID NOT IN DECODED" |
| 28 | + return True |
| 29 | + the_id = decoded['id'] |
| 30 | + tweet = decoded['text'] |
| 31 | + created_at = decoded['created_at'] |
| 32 | + created_at = time.mktime(time.strptime(created_at,"%a %b %d %H:%M:%S +0000 %Y")) |
| 33 | + # print created_at |
14 | 34 | # #open a file to store the status objects
|
15 |
| - # file = open('streaming_new_tweets.json', 'wb') |
16 | 35 | #write json to file
|
17 | 36 |
|
18 |
| - write_to_DB(decoded) |
| 37 | + self.write_tweets_to_DB(the_id, tweet, created_at) |
19 | 38 |
|
20 |
| - json.dump(decoded,file,sort_keys = True,indent = 4) |
| 39 | + # json.dump(decoded,file,sort_keys = True,indent = 4) |
21 | 40 | #show progress
|
22 |
| - print "Writing tweets to file,CTRL+C to terminate the program" |
| 41 | + # print "Writing tweets to file,CTRL+C to terminate the program" |
| 42 | + |
23 | 43 |
|
24 |
| - |
25 | 44 | return True
|
26 | 45 |
|
27 | 46 | def on_error(self, status):
|
28 | 47 | print "Error with status " + status
|
29 | 48 |
|
30 |
| -def write_to_DB(decoded): |
| 49 | + def write_tweets_to_DB(self, the_id, tweet, timestamp): |
| 50 | + |
| 51 | + self.conn, self.c = get_database_connection(DATABASES['RAW_TWEETS_DB']) |
| 52 | + self.c.execute('''INSERT OR IGNORE INTO ''' + self.company_name + \ |
| 53 | + ''' VALUES (?,?,?)''' ,(the_id, tweet, timestamp)) |
| 54 | + |
| 55 | + self.conn.commit() |
| 56 | + self.conn.close() |
| 57 | + |
| 58 | + |
| 59 | + try: |
| 60 | + self.conn, self.c = get_database_connection(DATABASES['FEATURES_DB']) |
| 61 | + self.c.execute('''CREATE TABLE IF NOT EXISTS ''' + self.company_name + \ |
| 62 | + ''' (hash INTEGER PRIMARY KEY, neg REAL, neu REAL, pos REAL, com REAL)''') |
| 63 | + |
| 64 | + feature = get_sentiment(tweet) |
| 65 | + features = [] |
| 66 | + features.append((the_id, feature['neg'], feature['neu'], \ |
| 67 | + feature['pos'], feature['compound'])) |
| 68 | + |
| 69 | + print features |
| 70 | + for index, feature in enumerate(features): |
| 71 | + self.c.execute('''INSERT OR IGNORE INTO ''' + self.company_name + \ |
| 72 | + ''' VALUES(?, ?, ?, ?, ?)''', \ |
| 73 | + (feature[0], feature[1], feature[2], feature[3], feature[4])) |
| 74 | + |
| 75 | + except UnicodeEncodeError: |
| 76 | + print "UnicodeEncodeError" |
| 77 | + pass |
| 78 | + finally: |
| 79 | + self.conn.commit() |
| 80 | + self.conn.close() |
| 81 | + print tweet |
| 82 | + print "--------------------------------------------------" |
| 83 | + |
| 84 | + |
| 85 | + |
| 86 | + |
| 87 | + def get_filtered_tweets_features(self, company): |
| 88 | + # Get all tweets from db |
| 89 | + conn, c = read_tweets(company) |
| 90 | + # Filter them |
| 91 | + features = [] |
| 92 | + for tweet in c.fetchall(): |
| 93 | + filtered_tweet = filter_tweet(tweet[1]) |
| 94 | + if filtered_tweet == '': |
| 95 | + continue |
| 96 | + feature = get_sentiment(filtered_tweet) |
| 97 | + features.append((tweet[0], feature['pos'])) |
| 98 | + # Close connection |
| 99 | + conn.close() |
| 100 | + return company, features |
31 | 101 |
|
32 |
| - return |
33 | 102 |
|
34 | 103 |
|
35 | 104 |
|
36 | 105 | if __name__ == '__main__':
|
37 |
| - l = DBListener() |
38 |
| - #authorize twitter, initialize tweepy |
39 |
| - auth = tweepy.OAuthHandler(config.consumer_key, config.consumer_secret) |
40 |
| - auth.set_access_token(config.access_token, config.access_secret) |
41 | 106 |
|
42 | 107 | # There are different kinds of streams: public stream, user stream, multi-user streams
|
43 | 108 | # For more details refer to https://dev.twitter.com/docs/streaming-apis
|
44 | 109 |
|
45 | 110 |
|
46 |
| - search_query = COMPANIES["Microsoft"] |
| 111 | + company_name = "Microsoft" |
| 112 | + search_query = COMPANIES[company_name] |
| 113 | + # create_tweets_table('FEATURES_DB', company_name ) |
47 | 114 |
|
48 |
| - create_tweets_table() |
| 115 | + l = DBListener(company_name) |
| 116 | + #authorize twitter, initialize tweepy |
| 117 | + auth = tweepy.OAuthHandler(config.consumer_key, config.consumer_secret) |
| 118 | + auth.set_access_token(config.access_token, config.access_secret) |
49 | 119 |
|
50 | 120 | stream = tweepy.Stream(auth, l)
|
51 | 121 |
|
52 | 122 | #Hashtag to stream
|
53 |
| - stream.filter(track=search_query) #Replace with your favorite hashtag or query |
| 123 | + stream.filter(track=[company_name]) #Replace with your favorite hashtag or query |
0 commit comments