From fa1ef185567dea479ad19013743089e192cbf84b Mon Sep 17 00:00:00 2001 From: CraigMariani Date: Wed, 2 Dec 2020 20:15:33 -0800 Subject: [PATCH 1/2] s3 uploader created --- s3_uploader.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 s3_uploader.py diff --git a/s3_uploader.py b/s3_uploader.py new file mode 100644 index 0000000..9cd246e --- /dev/null +++ b/s3_uploader.py @@ -0,0 +1,14 @@ +import boto3 +import os + + +if __name__ == '__main__': + client = boto3.client('s3') + pathlib.Path().absolute() + # os.chdir('data') + for file in os.listdir(): + if '.csv' in file: + print(file) + upload_file_bucket = 'twitter-data-01' + upload_file_key = 'csv_twitter/' + str(file) + client.upload_file(file, upload_file_bucket, upload_file_key) From bb15f61b6cb6a7d2e3643b1744b70c7de519175c Mon Sep 17 00:00:00 2001 From: CraigMariani Date: Thu, 17 Dec 2020 14:06:15 -0800 Subject: [PATCH 2/2] twitter miner working --- __pycache__/secrets.cpython-38.pyc | Bin 0 -> 418 bytes secrets.py | 5 ++ twitter_extract.py | 71 +++++++++++++++++++++++++++++ twitter_grant_data.csv | 31 +++++++++++++ 4 files changed, 107 insertions(+) create mode 100644 __pycache__/secrets.cpython-38.pyc create mode 100644 secrets.py create mode 100644 twitter_extract.py create mode 100644 twitter_grant_data.csv diff --git a/__pycache__/secrets.cpython-38.pyc b/__pycache__/secrets.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f5a2e500296dc9b51724c5b299094169c360d08 GIT binary patch literal 418 zcmYk2!A`cwa97hbc^E4>d7Qf%PB+>HCSBchspn}{$H8nxm$cnGlXizNCC^J-?CPaq zIG{M&9R|sI4@k;Sk3J&wT_l v$O0nIU?D?}W%|eElFKWyChO4OZ(!$Xu{*k1B`A;aZTWF_z(o?JiX{F5mDzq< literal 0 HcmV?d00001 diff --git a/secrets.py b/secrets.py new file mode 100644 index 0000000..a1f50c5 --- /dev/null +++ b/secrets.py @@ -0,0 +1,5 @@ +api_key = '' +api_secret = '' + +con_key = '' +con_secret = '' \ No newline at end of file diff --git a/twitter_extract.py b/twitter_extract.py new file mode 100644 index 0000000..877cff2 --- /dev/null +++ b/twitter_extract.py @@ -0,0 +1,71 @@ +import tweepy +import preprocessing as p +import csv +import pandas as pd +import pprint +import json +from secrets import api_key, api_secret, con_key, con_secret + +class TwitterExtract: + + def __init__(self): + # auth = tweepy.OAuthHandler(con_key, con_secret) + # auth.set_access_token(api_key, api_secret) + + auth = tweepy.OAuthHandler(api_key, api_secret) + auth.set_access_token(con_key, con_secret) + + api = tweepy.API(auth, wait_on_rate_limit=True) + + self.api = api + + def create_csv(self, tweet_data): + # tweet_csv = open('tweet_results.csv', 'a') + df = pd.DataFrame(tweet_data) + df.to_csv('twitter_grant_data.csv') + + + + def get_tweets(self): + api = self.api + + # search_word = '#sf' + search_words = ['#grant' , '#bayarea', '#sf '] + + # new_search = search_word + " -filter:retweets" + + data = { + 'search_word': [], + 'date_created': [], + 'tweet_id' : [], + 'text' : [], + 'source' : [] + } + + for search_word in search_words: + for item in tweepy.Cursor(api.search, q=search_word, lang='en').items(10): + + json_item = item._json + + data['search_word'].append(search_word) + data['date_created'].append(json_item['created_at']) + data['tweet_id'].append(json_item['id']) + data['text'].append(json_item['text'].encode('utf-8')) + data['source'].append(json_item['source']) + # tweet_data = [search_word, json_item['created_at'], json_item['id'], json_item['text'].encode('utf-8'), json_item['source']] + + # print('date') + # print(json_item['created_at']) + # print('id') + # print(json_item['id']) + # print('text') + # print(json_item['text']) + # print('source') + # print(json_item['source']) + # print(item.entities.get('hashtags')) + return data + +t = TwitterExtract() +tweet_data = t.get_tweets() +t.create_csv(tweet_data) +# pprint.pprint(tweet_data[0]) \ No newline at end of file diff --git a/twitter_grant_data.csv b/twitter_grant_data.csv new file mode 100644 index 0000000..8dcc084 --- /dev/null +++ b/twitter_grant_data.csv @@ -0,0 +1,31 @@ +,search_word,date_created,tweet_id,text,source +0,#grant,Thu Dec 17 20:44:50 +0000 2020,1339672928401514496,"b""RT @EM_TCCM: Applications for Fall 2021 are open until February 15th! Don't miss the opportunity to get a full grant to study Theoretical C\xe2\x80\xa6""","Twitter for Android" +1,#grant,Thu Dec 17 20:30:12 +0000 2020,1339669245479809034,b'SUNLIGHT receives \xe2\x82\xac50m grant to develop \xe2\x80\x98low-environmental footprint\xe2\x80\x99 cells\n\nGet more insights from FutureBridge at\xe2\x80\xa6 https://t.co/P8R015uulQ',"Hootsuite Inc." +2,#grant,Thu Dec 17 20:24:44 +0000 2020,1339667868162650128,"b'RT @teamSVBS: The last 2020 edition of our #newsletter is out now, featuring an important update on #grant funding and helpful advice on ho\xe2\x80\xa6'","Twitter for iPhone" +3,#grant,Thu Dec 17 19:48:56 +0000 2020,1339658860190175236,b'RT @EUI_History: What could you do as a PhD researcher @EUI_History? Have a look at our research profile https://t.co/8zz6WwBYRW and apply\xe2\x80\xa6',"Twitter Web App" +4,#grant,Thu Dec 17 19:44:35 +0000 2020,1339657763945918467,"b'RT @ChiTrust: Earlier this year, the Trust provided $425,000 in general operating funding to eight Building Pathways to Stability #grant re\xe2\x80\xa6'","Twitter Web App" +5,#grant,Thu Dec 17 18:54:12 +0000 2020,1339645084288094208,b'Just found out this week that I got a small #grant from Fredrik & Ingrid Thurings Stiftelse. My first ever grant as\xe2\x80\xa6 https://t.co/og0oOWwT2a',"Twitter for iPad" +6,#grant,Thu Dec 17 18:23:34 +0000 2020,1339637374876332035,b'RT @JGrapsa: This is your opportunity to apply for an educational #grant for #Echokurs - one of the best clinical-pathology-imaging meeting\xe2\x80\xa6',"Twitter for iPhone" +7,#grant,Thu Dec 17 18:16:32 +0000 2020,1339635605253308419,"b""RT @GBSLEP: Does your SME operate in the @GBSLEP area's Visitor Economy? Transportation, food & drink, accommodation, sports and fitness, e\xe2\x80\xa6""","Twitter Web App" +8,#grant,Thu Dec 17 18:16:08 +0000 2020,1339635507773497346,"b""RT @EM_TCCM: Applications for Fall 2021 are open until February 15th! Don't miss the opportunity to get a full grant to study Theoretical C\xe2\x80\xa6""","Twitter for Android" +9,#grant,Thu Dec 17 18:15:07 +0000 2020,1339635250972995585,"b'Earlier this year, the Trust provided $425,000 in general operating funding to eight Building Pathways to Stability\xe2\x80\xa6 https://t.co/mQwp7GjIf7'","Sprout Social" +10,#bayarea,Thu Dec 17 20:54:33 +0000 2020,1339675374863540224,"b'RT @KPIXtv: The travel #quarantine order strongly discourages non-essential travel of any kind and within any distance, including from one\xe2\x80\xa6'","Twitter for iPhone" +11,#bayarea,Thu Dec 17 20:54:25 +0000 2020,1339675338398343169,"b""RT @KPIXtv: #SanFrancisco's #quarantine order:\n- Anyone who comes to SF MUST #quarantine for 10 days if they spent any time outside the 9 #\xe2\x80\xa6""","Twitter for iPhone" +12,#bayarea,Thu Dec 17 20:53:57 +0000 2020,1339675221251379202,b'featuring:\n\n@surfdogteddy\nRusty The Surfing MinPin\nRothstein\nTristan\nCarson (Team Kihei)\n\n#surfingdog\xe2\x80\xa6 https://t.co/VGtSpam6iK',"Twitter Web App" +13,#bayarea,Thu Dec 17 20:51:44 +0000 2020,1339674665514459136,"b""RT @KPIXtv: SAN FRANCISCO QUARANTINE ORDER\nStarting at 12:01AM TOMORROW, #SanFrancisco's mandatory #quarantine for those traveling to the c\xe2\x80\xa6""","Twitter Web App" +14,#bayarea,Thu Dec 17 20:50:40 +0000 2020,1339674396546408448,b'RT @KPIXtv: #BREAKING:\n#SanFrancisco Mayor @LondonBreed & @SF_DPH Dr. Colfax announce a public health order that places a mandatory #quaran\xe2\x80\xa6',"Twitter for iPhone" +15,#bayarea,Thu Dec 17 20:50:15 +0000 2020,1339674291114110977,"b""RT @KPIXtv: SAN FRANCISCO QUARANTINE ORDER\nStarting at 12:01AM TOMORROW, #SanFrancisco's mandatory #quarantine for those traveling to the c\xe2\x80\xa6""","Twitter Web App" +16,#bayarea,Thu Dec 17 20:47:26 +0000 2020,1339673582998343683,"b""RT @KPIXtv: SAN FRANCISCO QUARANTINE ORDER\nStarting at 12:01AM TOMORROW, #SanFrancisco's mandatory #quarantine for those traveling to the c\xe2\x80\xa6""","SocialNewsDesk" +17,#bayarea,Thu Dec 17 20:47:26 +0000 2020,1339673582629228547,"b""RT @KPIXtv: SAN FRANCISCO QUARANTINE ORDER\nStarting at 12:01AM TOMORROW, #SanFrancisco's mandatory #quarantine for those traveling to the c\xe2\x80\xa6""","SocialNewsDesk" +18,#bayarea,Thu Dec 17 20:47:26 +0000 2020,1339673582507573248,"b""RT @KPIXtv: SAN FRANCISCO QUARANTINE ORDER\nStarting at 12:01AM TOMORROW, #SanFrancisco's mandatory #quarantine for those traveling to the c\xe2\x80\xa6""","SocialNewsDesk" +19,#bayarea,Thu Dec 17 20:47:26 +0000 2020,1339673582503378945,"b""RT @KPIXtv: SAN FRANCISCO QUARANTINE ORDER\nStarting at 12:01AM TOMORROW, #SanFrancisco's mandatory #quarantine for those traveling to the c\xe2\x80\xa6""","SocialNewsDesk" +20,#sf ,Thu Dec 17 20:58:23 +0000 2020,1339676338148425729,"b'Thermo Fisher Scientific is looking for teammates like you. See our latest #GeneralScience job openings, including\xe2\x80\xa6 https://t.co/dyUfL2itLU'","CareerArc 2.0" +21,#sf ,Thu Dec 17 20:58:18 +0000 2020,1339676315113451522,b'RT @shattawalegh: Are we all ready ? \xe2\x9c\x8c\xef\xb8\x8f\xf0\x9f\x98\x8f\n\nDP change for the #GOGALBUM ? \xf0\x9f\x98\xac\n\nG I F T. O F . G O D .\xf0\x9f\x98\x87\n\n#SM4LYF \n#SM\n#SF \n#DGC\n#SHAXI\n#SP\n#G\xe2\x80\xa6',"Twitter for iPhone" +22,#sf ,Thu Dec 17 20:57:54 +0000 2020,1339676216823934976,"b'Can you recommend anyone for this job in #SF, CA? https://t.co/6Zp46Irxd0 #scientist'","CareerArc 2.0" +23,#sf ,Thu Dec 17 20:57:09 +0000 2020,1339676027933642760,b'RT @FBISanFrancisco: #Breaking: FBI Special Agent in Charge Craig Fair announces reward in #SF church arson. https://t.co/nJ7Nv89U0M https:\xe2\x80\xa6',"Twitter for Android" +24,#sf ,Thu Dec 17 20:55:50 +0000 2020,1339675696344416256,b'RT @FBISanFrancisco: #Breaking: FBI Special Agent in Charge Craig Fair announces reward in #SF church arson. https://t.co/nJ7Nv89U0M https:\xe2\x80\xa6',"Twitter for Android" +25,#sf ,Thu Dec 17 20:54:42 +0000 2020,1339675411681214464,b'RT @FBISanFrancisco: #Breaking: FBI Special Agent in Charge Craig Fair announces reward in #SF church arson. https://t.co/nJ7Nv89U0M https:\xe2\x80\xa6',"Twitter for iPhone" +26,#sf ,Thu Dec 17 20:54:25 +0000 2020,1339675338398343169,"b""RT @KPIXtv: #SanFrancisco's #quarantine order:\n- Anyone who comes to SF MUST #quarantine for 10 days if they spent any time outside the 9 #\xe2\x80\xa6""","Twitter for iPhone" +27,#sf ,Thu Dec 17 20:53:58 +0000 2020,1339675226150301696,b'RT @FBISanFrancisco: #Breaking: FBI Special Agent in Charge Craig Fair announces reward in #SF church arson. https://t.co/nJ7Nv89U0M https:\xe2\x80\xa6',"Twitter for iPhone" +28,#sf ,Thu Dec 17 20:53:28 +0000 2020,1339675100078075906,b'RT @FBISanFrancisco: #Breaking: FBI Special Agent in Charge Craig Fair announces reward in #SF church arson. https://t.co/nJ7Nv89U0M https:\xe2\x80\xa6',"Twitter for iPhone" +29,#sf ,Thu Dec 17 20:51:18 +0000 2020,1339674555753742336,b'RT @FBISanFrancisco: #Breaking: FBI Special Agent in Charge Craig Fair announces reward in #SF church arson. https://t.co/nJ7Nv89U0M https:\xe2\x80\xa6',"Twitter Web App"