-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtwitter.py
105 lines (74 loc) · 3.43 KB
/
twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
# import chardet
import csv
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)
#options.add_experimental_option("excludeSwitches",["enable-automation"])
#options.add_experimental_option('useAutomationExtension',False)
#options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument("window-size=1280,800")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
Keyword = str(input("Enter the keyword: "))
Keyword_data = Keyword.replace(" ", "+")
pages = int(input("Enter number of pages: "))
url = 'https://nitter.net/search?f=tweets&q='+Keyword_data
for i in range(pages):
driver = webdriver.Chrome(options= options)
driver.maximize_window()
driver.get(url)
time.sleep(15)
tweet_final=[]
uname_final=[]
timesupremacy = []
previous_height = 0
while True:
height = driver.execute_script("""
function getActualHeight(){
return Math.max(
Math.max(document.body.scrollHeight, document.documentElement.scrollHeight),
Math.max(document.body.offsetHeight, document.documentElement.offsetHeight),
Math.max(document.body.clientHeight, document.documentElement.clientHeight)
);
}
return getActualHeight()
""")
driver.execute_script(f"window.scrollTo({previous_height},{previous_height+300})")
time.sleep(1)
previous_height += 300
if previous_height >= height:
break
soup = BeautifulSoup(driver.page_source, 'html.parser')
#driver.quit()
#print(soup.encode('utf-8'))
#
tweets = soup.findAll('div',{"class":"tweet-content media-body"})
tweet_list = [[x.text.encode('utf-8')] for x in tweets]
#print(tweet_list)
username = soup.findAll('div',{"class":"fullname-and-username"})
username_list = [[x.text.encode('utf-8')] for x in username]
#print(username_list)
timestamp = soup.findAll('span',{"class":"tweet-date"})
for i in timestamp:
timestamp_a = i.find('a')
timestamp_data = timestamp_a['title']
timesupremacy.append(timestamp_data)
timesupremacy_a = [[x] for x in timesupremacy]
for value1, value2, value3 in zip(username_list,tweet_list,timesupremacy_a):
# thewriter.writerow([value1,value2,value3])
print((value1[0]).decode("utf-8"),end="\t")
uname_final.append((value1[0]).decode("utf-8"))
print((value2[0]).decode("utf-8"),end='\t')
tweet_final.append((value2[0]).decode("utf-8"))
print((value3[0]))
#thewriter.writerows(tweet_list)
with open('tweetlist_final.csv', 'a', newline='', encoding='utf-8') as g:
thewriter = csv.writer(g)
for value1, value2, value3 in zip(uname_final,tweet_final,timesupremacy_a):
thewriter.writerow([value1,value2,value3])
print(value1,value2,value3)
driver.find_element(By.XPATH,'//div[@class="show-more"]/a').click()
previous_height = 0