Skip to content

Commit b339e2c

Browse files
committed
first commit
0 parents  commit b339e2c

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

google_custom_search.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import os
2+
import urllib.request
3+
import json
4+
5+
path = os.path.dirname(__file__)+'/'
6+
#req = 'my%20little%20pony' # Search request
7+
GCS_KEY = 'xxx' # Google Custom Search API key
8+
GCS_CX = 'zzz' # search engine
9+
gcsMaxResults = 30 # should be < 100 and (%10 == 0)
10+
filename = 'gcsRes.json'
11+
12+
def create_url(gcs_req, start=1):
13+
gcs_req = gcs_req.replace(' ','%20')
14+
return 'https://www.googleapis.com/customsearch/v1?key='+GCS_KEY+'&cx='+GCS_CX+'&num=10&start='+str(start)+'&q='+gcs_req
15+
16+
def get_url(url):
17+
s = 'error'
18+
try:
19+
f = urllib.request.urlopen(url)
20+
s = f.read()
21+
except urllib.error.HTTPError:
22+
s = 'connect error'
23+
except urllib.error.URLError:
24+
s = 'url error'
25+
return s
26+
27+
def get_json(gcs_req, mode='url', start=1, filename='gcsRes.json'):
28+
"""gets JSON formatted string from url or file
29+
args:
30+
gcsReq - search request
31+
mode - 'url' - open url, 'save' - open url + save to file, 'file - open file'
32+
return:
33+
JSON formatted string
34+
"""
35+
if mode =='url' or mode == 'save':
36+
req_url = create_url(gcs_req, start)
37+
page = get_url(req_url)
38+
if page == 'connect error' or page == 'url error':
39+
page = ('{ "kind" : "'+page+'" }').encode('utf-8')
40+
if mode == 'save':
41+
with open(path+filename, 'wb') as f:
42+
f.write(page)
43+
return page
44+
elif mode == 'file':
45+
with open(filename, 'rb') as f:
46+
page = json.loads(f.read())
47+
return page
48+
49+
def is_next_page(dict):
50+
if dict['queries'].get('nextPage') == None: return False
51+
else: return True
52+
53+
def total_results(dict):
54+
return int(dict['searchInformation']['totalResults'])
55+
56+
def next_page_start(dict):
57+
return dict['queries']['nextPage'][0]['startIndex']
58+
59+
def search_request(dict):
60+
return dict['queries']['request'][0]['searchTerms']
61+
62+
def page_links(dict):
63+
links = []
64+
for result in dict['items']:
65+
links.append(result['link'])
66+
return links
67+
68+
def all_links(gcs_req, show_info=False):
69+
gcsDict = {}
70+
links = []
71+
resStart = 1
72+
first_page = True
73+
while first_page == True or is_next_page(gcsDict) == True and next_page_start(gcsDict) < gcsMaxResults:
74+
if first_page == True: first_page = False
75+
gcsDict = json.loads(get_json(gcs_req, 'save', resStart))
76+
if (gcsDict['kind'] != 'connect error'):
77+
if total_results(gcsDict) > 0:
78+
links += page_links(gcsDict)
79+
if is_next_page(gcsDict) == True:
80+
resStart = next_page_start(gcsDict)
81+
if show_info == True: print('Request: '+search_request(gcsDict)+' Results: '+str(len(links)))
82+
return links

test.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import os
2+
import sys
3+
import json
4+
sys.path.append(os.path.dirname(__file__)+'/..')
5+
from ws_snd import google_custom_search as gcs
6+
7+
links = []
8+
path = os.path.dirname(__file__)+'/'
9+
print (path)
10+
11+
#req = 'dual%20nozzle%203dprntbot'
12+
req = 'my%20little%20pony'
13+
14+
15+
print(gcs.all_links(req, True))
16+
17+
18+

0 commit comments

Comments
 (0)