Skip to content

Commit 4ff58f9

Browse files
Lucas PengLucas Peng
authored andcommitted
first commit
1 parent 5b0f5d0 commit 4ff58f9

File tree

2 files changed

+173
-0
lines changed

2 files changed

+173
-0
lines changed

alexarank.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env python
2+
from urllib import request
3+
import sys, re
4+
xml = request.urlopen('http://data.alexa.com/data?cli=10&dat=s&url=%s'%sys.argv[1]).read().decode("utf-8")
5+
sp = re.search(r'REACH RANK="\d+"', xml).span()
6+
print(int(xml[sp[0]+12:sp[1]-1]))
7+
8+
# try: rank = int(re.search(r'\d+', xml).groups()[0])
9+
# except: rank = -1
10+
# print('Your rank for %s is %d!\n' % (sys.argv[1], rank))

hqtrivia.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
import numpy as np
2+
import cv2
3+
from PIL import Image
4+
import sys, re
5+
import pyocr
6+
import pyocr.builders
7+
import webbrowser
8+
from nltk.tag import pos_tag
9+
from urllib import request
10+
#from google import search
11+
from bs4 import BeautifulSoup
12+
import string
13+
import urllib
14+
class HQTrivia:
15+
# cam_size = (1280, 720)
16+
cam_size = (1920, 1080)
17+
capture_size = (int(700),int(280))
18+
answer_size = (int(650),int(350))
19+
capture_rect = ((int(cam_size[0]/2-capture_size[0]/2+70),
20+
int(cam_size[1]/2-capture_size[1]/2)),
21+
(int(cam_size[0]/2+capture_size[0]/2+70),
22+
int(cam_size[1]/2+capture_size[1]/2)))
23+
answer_rect = ((int(cam_size[0]/2-answer_size[0]/2+70),
24+
int(cam_size[1]/2-answer_size[1]/2 + 350)),
25+
(int(cam_size[0]/2+answer_size[0]/2+70),
26+
int(cam_size[1]/2+answer_size[1]/2)+ 350))
27+
28+
def __init__(self):
29+
# initialize OCR
30+
tools = pyocr.get_available_tools()
31+
if len(tools) == 0:
32+
print("No OCR tool found")
33+
sys.exit(1)
34+
self.tool = tools[0]
35+
print("Will use tool '%s'" % (self.tool.get_name()))
36+
langs = self.tool.get_available_languages()
37+
print("Available languages: %s" % ", ".join(langs))
38+
self.lang = langs[0]
39+
print("Will use lang '%s'" % (self.lang))
40+
41+
def get_page(self,query):
42+
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
43+
req = urllib.request.Request('http://www.google.com/search?q='+query,headers=headers)
44+
page = urllib.request.urlopen(req)
45+
html = page.read()
46+
return html
47+
48+
def alexa_rank(self,url):
49+
xml = request.urlopen('http://data.alexa.com/data?cli=10&dat=s&url=%s'%url).read().decode("utf-8")
50+
sp = re.search(r'REACH RANK="\d+"', xml).span()
51+
return int(xml[sp[0]+12:sp[1]-1])
52+
53+
def search_answer(self,query,ans):
54+
#search whole question
55+
query_plus = query.replace(' ','+')
56+
#print("query url:", "www.google.com/search?q="+query_plus)
57+
webbrowser.get('chrome').open_new_tab("http://www.google.com/search?q="+query_plus)
58+
59+
#try quote
60+
find_quote = False
61+
qmark_combine = (("“","\""),("\"","\""),("“","“"),("\"","“"),("“","”"),("\"","”"),("”","“"))
62+
for qc in qmark_combine:
63+
start_pt = query_plus.find(qc[0])
64+
end_pt = query_plus.find(qc[1], start_pt + 1) # add one to skip the opening "
65+
quote = query_plus[start_pt + 1: end_pt] # add one to get the quote excluding the ""
66+
if len(quote)!=0 and start_pt!=-1 and end_pt!=-1:
67+
print("find quote!")
68+
find_quote = True
69+
print(quote)
70+
webbrowser.get('chrome').open_new_tab("http://www.google.com/search?q="+quote)
71+
72+
#search proper nouns
73+
translator = str.maketrans('', '', string.punctuation)
74+
query = query.translate(translator)
75+
tagged_sent = pos_tag(query.split())
76+
propernouns = [word for word,pos in tagged_sent if pos == 'NNP']
77+
propernouns = list(filter(lambda x: x != "Which", propernouns))
78+
if len(propernouns)!=0:
79+
if find_quote == False:
80+
print("search: "," ".join(propernouns))
81+
webbrowser.get('chrome').open_new_tab("http://www.google.com/search?q="+"+".join(propernouns))
82+
print('------------------------------------------------------')
83+
if len(ans)<=5:
84+
for a in ans:
85+
print("\n"+a+" "+" ".join(propernouns))
86+
try:
87+
html=self.get_page(a+"+"+"+".join(propernouns))
88+
html = str(html)
89+
start_idx = html.find("<h3 class=\"r\"><a href=\"")
90+
end_idx = html.find('\"',start_idx+len("<h3 class=\"r\"><a href=\""))
91+
url = html[start_idx+len("<h3 class=\"r\"><a href=\""):end_idx]
92+
print(url)
93+
print(len(url))
94+
except:
95+
pass
96+
print('------------------------------------------------------')
97+
print('------------------------------------------------------')
98+
def main_loop(self,flip=False):
99+
cap = cv2.VideoCapture(0)
100+
while(True):
101+
# Capture frame-by-frame
102+
ret, frame = cap.read()
103+
104+
# Our operations on the frame come here
105+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
106+
#print(gray.shape)
107+
if flip:
108+
gray_flip = cv2.flip(gray,1)
109+
else:
110+
gray_flip = gray
111+
cv2.rectangle(gray_flip, self.capture_rect[0], self.capture_rect[1], (0,0,0))
112+
cv2.rectangle(gray_flip, self.answer_rect[0], self.answer_rect[1], (0,0,0))
113+
# Display the resulting frame
114+
cv2.imshow('frame',gray_flip)
115+
116+
key = cv2.waitKey(1) & 0xFF
117+
if key == ord('c'):
118+
print('capture!')
119+
cap_p1 = self.capture_rect[0]
120+
cap_p2 = self.capture_rect[1]
121+
cap_img = gray[cap_p1[1]:cap_p2[1], cap_p1[0]:cap_p2[0]]
122+
ans_p1 = self.answer_rect[0]
123+
ans_p2 = self.answer_rect[1]
124+
ans_img = gray[ans_p1[1]:ans_p2[1], ans_p1[0]:ans_p2[0]]
125+
#cap_img = cv2.medianBlur(cap_img,5)
126+
#cap_img = cv2.GaussianBlur(cap_img,(3,3),0)
127+
ret,cap_img = cv2.threshold(cap_img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
128+
#ans_img = cv2.GaussianBlur(ans_img,(5,5),0)
129+
#ret,ans_img = cv2.threshold(ans_img,200,255,cv2.THRESH_BINARY)
130+
ret,ans_img = cv2.threshold(ans_img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
131+
#ans_img = cv2.adaptiveThreshold(ans_img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
132+
#cap_img = cv2.adaptiveThreshold(cap_img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
133+
cv2.imwrite('test.png',cap_img)
134+
cv2.imwrite('test_ans.png',ans_img)
135+
txt = self.tool.image_to_string(
136+
Image.fromarray(cap_img),
137+
lang=self.lang,
138+
builder=pyocr.builders.TextBuilder()
139+
)
140+
anstxt = self.tool.image_to_string(
141+
Image.fromarray(ans_img),
142+
lang=self.lang,
143+
builder=pyocr.builders.TextBuilder()
144+
)
145+
query = txt.replace('\n',' ').rstrip()
146+
print(query)
147+
# for b in ans_box:
148+
# print(b.content)
149+
anstxt = anstxt.rstrip().split('\n')
150+
anstxt = list(filter(lambda x: x!='', anstxt))
151+
print(anstxt)
152+
self.search_answer(query,anstxt)
153+
154+
elif key == ord('q'):
155+
break
156+
157+
# When everything done, release the capture
158+
cap.release()
159+
cv2.destroyAllWindows()
160+
161+
if __name__ == '__main__':
162+
hqt = HQTrivia()
163+
hqt.main_loop()

0 commit comments

Comments
 (0)