|
| 1 | +import numpy as np |
| 2 | +import cv2 |
| 3 | +from PIL import Image |
| 4 | +import sys, re |
| 5 | +import pyocr |
| 6 | +import pyocr.builders |
| 7 | +import webbrowser |
| 8 | +from nltk.tag import pos_tag |
| 9 | +from urllib import request |
| 10 | +#from google import search |
| 11 | +from bs4 import BeautifulSoup |
| 12 | +import string |
| 13 | +import urllib |
| 14 | +class HQTrivia: |
| 15 | +# cam_size = (1280, 720) |
| 16 | + cam_size = (1920, 1080) |
| 17 | + capture_size = (int(700),int(280)) |
| 18 | + answer_size = (int(650),int(350)) |
| 19 | + capture_rect = ((int(cam_size[0]/2-capture_size[0]/2+70), |
| 20 | + int(cam_size[1]/2-capture_size[1]/2)), |
| 21 | + (int(cam_size[0]/2+capture_size[0]/2+70), |
| 22 | + int(cam_size[1]/2+capture_size[1]/2))) |
| 23 | + answer_rect = ((int(cam_size[0]/2-answer_size[0]/2+70), |
| 24 | + int(cam_size[1]/2-answer_size[1]/2 + 350)), |
| 25 | + (int(cam_size[0]/2+answer_size[0]/2+70), |
| 26 | + int(cam_size[1]/2+answer_size[1]/2)+ 350)) |
| 27 | + |
| 28 | + def __init__(self): |
| 29 | + # initialize OCR |
| 30 | + tools = pyocr.get_available_tools() |
| 31 | + if len(tools) == 0: |
| 32 | + print("No OCR tool found") |
| 33 | + sys.exit(1) |
| 34 | + self.tool = tools[0] |
| 35 | + print("Will use tool '%s'" % (self.tool.get_name())) |
| 36 | + langs = self.tool.get_available_languages() |
| 37 | + print("Available languages: %s" % ", ".join(langs)) |
| 38 | + self.lang = langs[0] |
| 39 | + print("Will use lang '%s'" % (self.lang)) |
| 40 | + |
| 41 | + def get_page(self,query): |
| 42 | + headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} |
| 43 | + req = urllib.request.Request('http://www.google.com/search?q='+query,headers=headers) |
| 44 | + page = urllib.request.urlopen(req) |
| 45 | + html = page.read() |
| 46 | + return html |
| 47 | + |
| 48 | + def alexa_rank(self,url): |
| 49 | + xml = request.urlopen('http://data.alexa.com/data?cli=10&dat=s&url=%s'%url).read().decode("utf-8") |
| 50 | + sp = re.search(r'REACH RANK="\d+"', xml).span() |
| 51 | + return int(xml[sp[0]+12:sp[1]-1]) |
| 52 | + |
| 53 | + def search_answer(self,query,ans): |
| 54 | + #search whole question |
| 55 | + query_plus = query.replace(' ','+') |
| 56 | + #print("query url:", "www.google.com/search?q="+query_plus) |
| 57 | + webbrowser.get('chrome').open_new_tab("http://www.google.com/search?q="+query_plus) |
| 58 | + |
| 59 | + #try quote |
| 60 | + find_quote = False |
| 61 | + qmark_combine = (("“","\""),("\"","\""),("“","“"),("\"","“"),("“","”"),("\"","”"),("”","“")) |
| 62 | + for qc in qmark_combine: |
| 63 | + start_pt = query_plus.find(qc[0]) |
| 64 | + end_pt = query_plus.find(qc[1], start_pt + 1) # add one to skip the opening " |
| 65 | + quote = query_plus[start_pt + 1: end_pt] # add one to get the quote excluding the "" |
| 66 | + if len(quote)!=0 and start_pt!=-1 and end_pt!=-1: |
| 67 | + print("find quote!") |
| 68 | + find_quote = True |
| 69 | + print(quote) |
| 70 | + webbrowser.get('chrome').open_new_tab("http://www.google.com/search?q="+quote) |
| 71 | + |
| 72 | + #search proper nouns |
| 73 | + translator = str.maketrans('', '', string.punctuation) |
| 74 | + query = query.translate(translator) |
| 75 | + tagged_sent = pos_tag(query.split()) |
| 76 | + propernouns = [word for word,pos in tagged_sent if pos == 'NNP'] |
| 77 | + propernouns = list(filter(lambda x: x != "Which", propernouns)) |
| 78 | + if len(propernouns)!=0: |
| 79 | + if find_quote == False: |
| 80 | + print("search: "," ".join(propernouns)) |
| 81 | + webbrowser.get('chrome').open_new_tab("http://www.google.com/search?q="+"+".join(propernouns)) |
| 82 | + print('------------------------------------------------------') |
| 83 | + if len(ans)<=5: |
| 84 | + for a in ans: |
| 85 | + print("\n"+a+" "+" ".join(propernouns)) |
| 86 | + try: |
| 87 | + html=self.get_page(a+"+"+"+".join(propernouns)) |
| 88 | + html = str(html) |
| 89 | + start_idx = html.find("<h3 class=\"r\"><a href=\"") |
| 90 | + end_idx = html.find('\"',start_idx+len("<h3 class=\"r\"><a href=\"")) |
| 91 | + url = html[start_idx+len("<h3 class=\"r\"><a href=\""):end_idx] |
| 92 | + print(url) |
| 93 | + print(len(url)) |
| 94 | + except: |
| 95 | + pass |
| 96 | + print('------------------------------------------------------') |
| 97 | + print('------------------------------------------------------') |
| 98 | + def main_loop(self,flip=False): |
| 99 | + cap = cv2.VideoCapture(0) |
| 100 | + while(True): |
| 101 | + # Capture frame-by-frame |
| 102 | + ret, frame = cap.read() |
| 103 | + |
| 104 | + # Our operations on the frame come here |
| 105 | + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) |
| 106 | + #print(gray.shape) |
| 107 | + if flip: |
| 108 | + gray_flip = cv2.flip(gray,1) |
| 109 | + else: |
| 110 | + gray_flip = gray |
| 111 | + cv2.rectangle(gray_flip, self.capture_rect[0], self.capture_rect[1], (0,0,0)) |
| 112 | + cv2.rectangle(gray_flip, self.answer_rect[0], self.answer_rect[1], (0,0,0)) |
| 113 | + # Display the resulting frame |
| 114 | + cv2.imshow('frame',gray_flip) |
| 115 | + |
| 116 | + key = cv2.waitKey(1) & 0xFF |
| 117 | + if key == ord('c'): |
| 118 | + print('capture!') |
| 119 | + cap_p1 = self.capture_rect[0] |
| 120 | + cap_p2 = self.capture_rect[1] |
| 121 | + cap_img = gray[cap_p1[1]:cap_p2[1], cap_p1[0]:cap_p2[0]] |
| 122 | + ans_p1 = self.answer_rect[0] |
| 123 | + ans_p2 = self.answer_rect[1] |
| 124 | + ans_img = gray[ans_p1[1]:ans_p2[1], ans_p1[0]:ans_p2[0]] |
| 125 | + #cap_img = cv2.medianBlur(cap_img,5) |
| 126 | + #cap_img = cv2.GaussianBlur(cap_img,(3,3),0) |
| 127 | + ret,cap_img = cv2.threshold(cap_img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) |
| 128 | + #ans_img = cv2.GaussianBlur(ans_img,(5,5),0) |
| 129 | + #ret,ans_img = cv2.threshold(ans_img,200,255,cv2.THRESH_BINARY) |
| 130 | + ret,ans_img = cv2.threshold(ans_img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) |
| 131 | + #ans_img = cv2.adaptiveThreshold(ans_img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) |
| 132 | + #cap_img = cv2.adaptiveThreshold(cap_img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) |
| 133 | + cv2.imwrite('test.png',cap_img) |
| 134 | + cv2.imwrite('test_ans.png',ans_img) |
| 135 | + txt = self.tool.image_to_string( |
| 136 | + Image.fromarray(cap_img), |
| 137 | + lang=self.lang, |
| 138 | + builder=pyocr.builders.TextBuilder() |
| 139 | + ) |
| 140 | + anstxt = self.tool.image_to_string( |
| 141 | + Image.fromarray(ans_img), |
| 142 | + lang=self.lang, |
| 143 | + builder=pyocr.builders.TextBuilder() |
| 144 | + ) |
| 145 | + query = txt.replace('\n',' ').rstrip() |
| 146 | + print(query) |
| 147 | + # for b in ans_box: |
| 148 | + # print(b.content) |
| 149 | + anstxt = anstxt.rstrip().split('\n') |
| 150 | + anstxt = list(filter(lambda x: x!='', anstxt)) |
| 151 | + print(anstxt) |
| 152 | + self.search_answer(query,anstxt) |
| 153 | + |
| 154 | + elif key == ord('q'): |
| 155 | + break |
| 156 | + |
| 157 | + # When everything done, release the capture |
| 158 | + cap.release() |
| 159 | + cv2.destroyAllWindows() |
| 160 | + |
| 161 | +if __name__ == '__main__': |
| 162 | + hqt = HQTrivia() |
| 163 | + hqt.main_loop() |
0 commit comments