-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
83 lines (70 loc) · 2.61 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests
import google
import xmltodict
from advertools import robotstxt_to_df
from bs4 import BeautifulSoup
from advertools import sitemap_to_df
from termcolor import colored
from collections import Counter
from googlesearch import search
import math
def redirectTest(domTest):
try:
r = requests.head(domTest, allow_redirects=False)
if (r.status_code == 301):
print ("Tested:"+domTest+"\n\t"+r.url+"\n\t"+str(r.status_code)+"\n\t"+r.headers['Location'])
return str("Tested:"+domTest+"\n\t"+r.url+"\n\t"+str(r.status_code)+"\n\t"+r.headers['Location'])
elif (r.status_code == 302):
print ("Tested:"+domTest+"\n\t"+r.url+"\n\t"+str(r.status_code)+"\n\t"+r.headers['Location'])
return str("Tested:"+domTest+"\n\t"+r.url+"\n\t"+str(r.status_code)+"\n\t"+r.headers['Location'])
else:
print ("Tested:"+domTest+"\n\t"+r.url+"\n\t"+str(r.status_code))
return str("Tested:"+domTest+"\n\t"+r.url+"\n\t"+str(r.status_code))
except requests.exceptions.RequestException as e:
print ("Tested:"+domTest+"\n\t"+e)
return str("Tested:"+domTest+"\n\t"+e)
return str(1)
pass
def concatenateDom(dom):
mylist = ['http://www.', 'http://', 'https://www.', 'https://']
for a in mylist:
domTest = a + dom
ret_str = redirectTest(domTest)
return ret_str
def robots_df(url):
output=robotstxt_to_df(url+'/robots.txt')
return output
def gtag(url):
html_text = requests.get(url).text
soup = BeautifulSoup(html_text,'lxml')
info = soup.find('g')
print(info)
return info
def sitemap(url):
sigortam = sitemap_to_df(url+'/wp-sitemap.xml')
return sigortam
def title(url):
reqs = requests.get(url)
soup = BeautifulSoup(reqs.text, 'html.parser')
print(colored("TITLE OF THE WEBSITE : ","red", attrs=['bold']))
for title in soup.find_all('title'):
text=title.get_text()
ret_title = text
ret_title_len = str("Number of Character in the Title: "+str(len(text)))
return ret_title, ret_title_len
########## RANK based on website and keyword
# Google rank for given website and key word
def url_keyword_rank(website, keyword):
found = False
print('1----')
urls = search(keyword, tld="com", num=150, stop=150, pause=5)
for index, url in enumerate(urls):
print(index, url)
if website in url:
ret_str = str(index+1)+","
ret_str += str(math.ceil((index+1)/10))
found = True
return ret_str
break
if not found:
return "not found, not found"