Skip to content

Commit b5a046a

Browse files
Merge branch 'paul'
2 parents 2bd6ade + 5c6e9b3 commit b5a046a

File tree

4 files changed

+178
-60
lines changed

4 files changed

+178
-60
lines changed

.gitignore

+11
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
11
*.pyc
22
*.json
3+
*.db
4+
raw_tweets.db
5+
utils.pyc
6+
FEATURES_DB
7+
donald.py
8+
twitter_stream_download.py
9+
twitterstream.py
10+
extracted_features.db
11+
raw_tweets.db
12+
streaming_new_tweets.py
13+

search_for_words.py

+34
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,37 @@ def add_tweets_in_db(tweets, keyword):
136136
# Join stuff
137137
for proc in jobs:
138138
proc.join()
139+
# keyword = TEST_PARAMS['ANALYSER_COMPANY']
140+
# max_tweets = TEST_PARAMS['MAX_TWEETS']
141+
# tweets = get_tweets_from_keyword(keyword, max_tweets)
142+
143+
# keywords = COMPANIES['Astrazeneca']
144+
keywords = "Microsoft"
145+
max_tweets = TEST_PARAMS['MAX_TWEETS']
146+
147+
tweets = get_tweets_from_keyword(keywords, max_tweets)
148+
tweets = tweets + get_tweets_from_keyword('#' + keywords, max_tweets)
149+
tweets = tweets + get_tweets_from_keyword('@' + keywords, max_tweets)
150+
add_tweets_in_db(tweets, keywords)
151+
152+
# The old way to print it out
153+
# for keyword in keywords:
154+
# tweets = get_tweets_from_keyword(keyword[0], max_tweets)
155+
# =======
156+
# # keywords = COMPANIES['Astrazeneca']
157+
# keywords = "Microsoft"
158+
# max_tweets = TEST_PARAMS['MAX_TWEETS']
159+
160+
# tweets = get_tweets_from_keyword(keywords, max_tweets)
161+
# tweets = tweets + get_tweets_from_keyword('#' + keywords, max_tweets)
162+
# tweets = tweets + get_tweets_from_keyword('@' + keywords, max_tweets)
163+
# add_tweets_in_db(tweets, keywords)
164+
165+
# # The old way to print it out
166+
# # for keyword in keywords:
167+
# # tweets = get_tweets_from_keyword(keyword[0], max_tweets)
168+
# >>>>>>> switching branches
169+
170+
# # for tweet in tweets:
171+
# # print tweet
172+
# >>>>>>> master

streaming_new_tweets.py

+85-15
Original file line numberDiff line numberDiff line change
@@ -5,49 +5,119 @@
55
import json
66
import config
77
from utils import *
8+
from analyser import *
9+
import time
810

911
# This is the listener, resposible for receiving data
1012
class DBListener(tweepy.StreamListener):
13+
def __init__(self ,company_name):
14+
self.company_name = company_name
15+
create_tweets_table(DATABASES['RAW_TWEETS_DB'], self.company_name)
16+
1117
def on_data(self, data):
12-
# Parsing
18+
# Parsing
19+
# print data
1320
decoded = json.loads(data)
21+
# print "PASSES"
22+
# file = open('NEW_JSON.json', 'wb')
23+
# json.dump(decoded,file,sort_keys = True,indent = 4)
24+
25+
26+
if "id" not in decoded:
27+
print "ID NOT IN DECODED"
28+
return True
29+
the_id = decoded['id']
30+
tweet = decoded['text']
31+
created_at = decoded['created_at']
32+
created_at = time.mktime(time.strptime(created_at,"%a %b %d %H:%M:%S +0000 %Y"))
33+
# print created_at
1434
# #open a file to store the status objects
15-
# file = open('streaming_new_tweets.json', 'wb')
1635
#write json to file
1736

18-
write_to_DB(decoded)
37+
self.write_tweets_to_DB(the_id, tweet, created_at)
1938

20-
json.dump(decoded,file,sort_keys = True,indent = 4)
39+
# json.dump(decoded,file,sort_keys = True,indent = 4)
2140
#show progress
22-
print "Writing tweets to file,CTRL+C to terminate the program"
41+
# print "Writing tweets to file,CTRL+C to terminate the program"
42+
2343

24-
2544
return True
2645

2746
def on_error(self, status):
2847
print "Error with status " + status
2948

30-
def write_to_DB(decoded):
49+
def write_tweets_to_DB(self, the_id, tweet, timestamp):
50+
51+
self.conn, self.c = get_database_connection(DATABASES['RAW_TWEETS_DB'])
52+
self.c.execute('''INSERT OR IGNORE INTO ''' + self.company_name + \
53+
''' VALUES (?,?,?)''' ,(the_id, tweet, timestamp))
54+
55+
self.conn.commit()
56+
self.conn.close()
57+
58+
59+
try:
60+
self.conn, self.c = get_database_connection(DATABASES['FEATURES_DB'])
61+
self.c.execute('''CREATE TABLE IF NOT EXISTS ''' + self.company_name + \
62+
''' (hash INTEGER PRIMARY KEY, neg REAL, neu REAL, pos REAL, com REAL)''')
63+
64+
feature = get_sentiment(tweet)
65+
features = []
66+
features.append((the_id, feature['neg'], feature['neu'], \
67+
feature['pos'], feature['compound']))
68+
69+
print features
70+
for index, feature in enumerate(features):
71+
self.c.execute('''INSERT OR IGNORE INTO ''' + self.company_name + \
72+
''' VALUES(?, ?, ?, ?, ?)''', \
73+
(feature[0], feature[1], feature[2], feature[3], feature[4]))
74+
75+
except UnicodeEncodeError:
76+
print "UnicodeEncodeError"
77+
pass
78+
finally:
79+
self.conn.commit()
80+
self.conn.close()
81+
print tweet
82+
print "--------------------------------------------------"
83+
84+
85+
86+
87+
def get_filtered_tweets_features(self, company):
88+
# Get all tweets from db
89+
conn, c = read_tweets(company)
90+
# Filter them
91+
features = []
92+
for tweet in c.fetchall():
93+
filtered_tweet = filter_tweet(tweet[1])
94+
if filtered_tweet == '':
95+
continue
96+
feature = get_sentiment(filtered_tweet)
97+
features.append((tweet[0], feature['pos']))
98+
# Close connection
99+
conn.close()
100+
return company, features
31101

32-
return
33102

34103

35104

36105
if __name__ == '__main__':
37-
l = DBListener()
38-
#authorize twitter, initialize tweepy
39-
auth = tweepy.OAuthHandler(config.consumer_key, config.consumer_secret)
40-
auth.set_access_token(config.access_token, config.access_secret)
41106

42107
# There are different kinds of streams: public stream, user stream, multi-user streams
43108
# For more details refer to https://dev.twitter.com/docs/streaming-apis
44109

45110

46-
search_query = COMPANIES["Microsoft"]
111+
company_name = "Microsoft"
112+
search_query = COMPANIES[company_name]
113+
# create_tweets_table('FEATURES_DB', company_name )
47114

48-
create_tweets_table()
115+
l = DBListener(company_name)
116+
#authorize twitter, initialize tweepy
117+
auth = tweepy.OAuthHandler(config.consumer_key, config.consumer_secret)
118+
auth.set_access_token(config.access_token, config.access_secret)
49119

50120
stream = tweepy.Stream(auth, l)
51121

52122
#Hashtag to stream
53-
stream.filter(track=search_query) #Replace with your favorite hashtag or query
123+
stream.filter(track=[company_name]) #Replace with your favorite hashtag or query

utils.py

+48-45
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
'MAX_TWEETS' : 1100 }
1717

1818
COMPANIES = {
19-
"Apple" : [
19+
"Apple" :
2020
"Apple OR "
2121
+"#Apple OR "
2222
+"iPad OR "
@@ -26,35 +26,35 @@
2626
+"mac OR "
2727
+"macbook OR "
2828
+"iMac"
29-
],
30-
"Facebook" : [
29+
,
30+
"Facebook" :
3131
"Facebook OR "
3232
+"#Facebook OR "
3333
+"news feed OR "
3434
+"poke OR "
3535
+"status OR "
3636
+"timeline OR "
3737
+"messenger"
38-
],
39-
"Costa" : [
38+
,
39+
"Costa" :
4040
'Costa OR '
4141
+'#Costa OR '
4242
+'Whitbread '
43-
],
44-
"Microsoft" : [
43+
,
44+
"Microsoft" :
4545
'Mircosoft OR '
4646
+'#Mircosoft OR '
47-
# +'Surface OR '
48-
# +'power point OR '
49-
# +'excel OR '
50-
# +'vista OR '
51-
# +'kinect OR '
52-
# +'bing OR '
53-
# +'visual basic OR '
54-
# +'visual studio OR '
47+
+'Surface OR '
48+
+'power point OR '
49+
+'excel OR '
50+
+'vista OR '
51+
+'kinect OR '
52+
+'bing OR '
53+
+'visual basic OR '
54+
+'visual studio OR '
5555
+'ms-dos'
56-
],
57-
"Astrazeneca" : [
56+
,
57+
"Astrazeneca" :
5858
"Astrazeneca OR "
5959
+"ARIMIDEX OR "
6060
+"ATACAND OR "
@@ -75,33 +75,33 @@
7575
+"Astrazeneca OR "
7676
+"Astrazeneca OR "
7777
+"Astrazeneca OR "
78-
+"Carbocaine OR "
79-
+"Citanes OR "
80-
+"Diprivan OR "
81-
+"EMLA OR "
82-
+"MarcaineOR "
83-
+"Naropin OR "
84-
+"Xylocaine OR "
85-
+"Xyloproct OR "
86-
+"Atacand OR "
87-
+"Betaloc OR "
88-
+"BrilintaOR "
89-
+"Crestor OR "
90-
+"ExantaOR "
91-
+"Epanova OR "
92-
+"Imdur OR "
93-
+"Inderal OR "
94-
+"Lexxel OR "
95-
+"Logimax OR "
96-
+"Nif-Ten OR "
97-
+"Plendil OR "
98-
+"Ramace OR "
99-
+"SelokenOR "
100-
+"Tenoretic OR "
101-
+"Tenormin OR "
102-
+"Unimax OR "
103-
+"Zestoretic OR "
104-
+"Zestril OR "
78+
# +"Carbocaine OR "
79+
# +"Citanes OR "
80+
# +"Diprivan OR "
81+
# +"EMLA OR "
82+
# +"MarcaineOR "
83+
# +"Naropin OR "
84+
# +"Xylocaine OR "
85+
# +"Xyloproct OR "
86+
# +"Atacand OR "
87+
# +"Betaloc OR "
88+
# +"BrilintaOR "
89+
# +"Crestor OR "
90+
# +"ExantaOR "
91+
# +"Epanova OR "
92+
# +"Imdur OR "
93+
# +"Inderal OR "
94+
# +"Lexxel OR "
95+
# +"Logimax OR "
96+
# +"Nif-Ten OR "
97+
# +"Plendil OR "
98+
# +"Ramace OR "
99+
# +"SelokenOR "
100+
# +"Tenoretic OR "
101+
# +"Tenormin OR "
102+
# +"Unimax OR "
103+
# +"Zestoretic OR "
104+
# +"Zestril OR "
105105
# +"Bydureon OR "
106106
# +"Byetta OR "
107107
# +"FarxigaOR "
@@ -143,7 +143,10 @@
143143
# +"Bricanyl OR "
144144
# +"Oxis OR "
145145
+"Pulmicort"
146-
]
146+
,
147+
"ytytye9393jenmebrhriue" :
148+
"ytytye9393jenmebrhriue"
149+
147150
}
148151

149152
DATABASES = {'RAW_TWEETS_DB': 'raw_tweets.db',

0 commit comments

Comments
 (0)