7
7
import wikipedia
8
8
import re
9
9
import html2text
10
+ import time
10
11
11
12
class bcolors :
12
13
HEADER = '\033 [95m'
@@ -141,10 +142,24 @@ def search_quote(self,question, open_in_browser=False):
141
142
webbrowser .get ('chrome' ).open_new_tab ("http://www.google.com/search?q=" + query_plus )
142
143
return find_quote , quote
143
144
145
+ def search_all_cap (self , question ):
146
+ find_all_cap = False
147
+ ans = re .findall ('([A-Z][a-z]+(?=\s[A-Z])(?:\s[A-Z][a-z]+)+)' ,question )
148
+ ans = " " .join (ans )
149
+ if len (ans .split ()) >= 2 :
150
+ find_all_cap = True
151
+ print ("find capitalize nouns!" )
152
+ print (ans )
153
+ return find_all_cap , ans
154
+
144
155
def get_propernouns (self ,question ):
156
+ #Find quotation mark
145
157
find_quote , quote = self .search_quote (question )
146
158
if find_quote :
147
159
return quote .split ()
160
+ find_all_cap , quote = self .search_all_cap (question )
161
+ if find_all_cap :
162
+ return quote .split ()
148
163
#search proper nouns
149
164
translator = str .maketrans ('' , '' , string .punctuation )
150
165
query = question .translate (translator )
@@ -201,6 +216,22 @@ def search_wikipedia2(self, question, ans, use_google = True):
201
216
count += self .find_occurance (html .upper (), q .upper ())/ len (html )
202
217
print (a ,": " ,round (count * 100000 ))
203
218
219
+ def find_list_query (self , question ):
220
+ found = False
221
+ q_split = question .split ()
222
+ tagged_sent = pos_tag (q_split )
223
+ print (tagged_sent )
224
+ start_word = [word for word ,pos in tagged_sent if pos in ['JJS' ,'RBS' ]]
225
+ if len (start_word ) > 0 :
226
+ found = True
227
+ q = " " .join (q_split [q_split .index (start_word [0 ]):])
228
+ else :
229
+ q = ""
230
+ return found , q
231
+
232
+ # superlative_adj = [word for word,pos in tagged_sent if pos in ['JJS','RBS']]
233
+ # print(superlative_adj)
234
+
204
235
def search_answer (self ,question ,ans ):
205
236
#self.find_occurance("a aa a","a")
206
237
question = question .replace ("of the following" ,"" )
@@ -211,30 +242,42 @@ def search_answer(self,question,ans):
211
242
question = unidecode (question ) #convert all symbol to ascii, ie: curly quote to simple quote
212
243
except :
213
244
pass
245
+ #fist search the whole question on google
214
246
self .search_google (question ,False )
247
+ #second, search the whole question for wikipedia page
215
248
self .search_wikipedia (question .split (), ans , True )
249
+ #TODO: identify superlative adjetives, (never, most, ADJest). find wikipedia list
250
+ #need_list, query = self.find_list_query(question)
251
+ # if need_list:
252
+ # print(query)
253
+
254
+ #search for special nouns
216
255
propernouns = self .get_propernouns (question )
217
256
print (propernouns )
218
- #translator = str.maketrans('', '', string.punctuation)
219
257
if len (propernouns )> 0 :
220
- #self.search_google(" ".join(propernouns),False)
221
258
self .search_wikipedia (propernouns , ans , True )
222
- #question = question.translate(translator)#remove punctuations
223
- #self.search_wikipedia2(question, ans, True)
224
- print (bcolors .FAIL + "Ready to Capture!" + bcolors .ENDC )
259
+
260
+ #done!
261
+ print (" \n " + bcolors .FAIL + "Ready to Capture!" + bcolors .ENDC )
225
262
226
263
if __name__ == '__main__' :
227
- questions = [["Which of these websites is owned by Vice Media?" ,["IGN" ,"Joystiq" ,"Waypoint" ]],
264
+ questions = [# ["Which of these websites is owned by Vice Media?",["IGN","Joystiq","Waypoint"]],
228
265
# ["Which 80s song begins, “Bass, how low can you go?”",["My Adidas","Push It","Bring The Noise"]],
229
266
# ["Which of these is a popular anime series by Rooster Teeth?",["RWBY","BURY","WAKY"]],
230
267
# ["In Mexico, a saladito is always known as what?",["Taco salad", "Salted plum", "Guava roll"]],
231
268
# ["Which actor turned down the role of James Bond twice before finally accepting",["Timothy Dalton", "Roger Moore", "Sean Connery"]],
232
269
# ["Which country is Bond girl actress Eva Green from?",["France", "Denmark", "England"]],
233
270
# ["What does an okta measure?",["Japanese seasons", "Ocean salinity", "Cloud cover"]],
234
271
#["In the 2010 Oracle v. Google case, it was ruled that which cannot be copyrighted?",["Search databae", "Web addresses", "APIs"]],
272
+ ["Which of these is the most populous city with “city” in its name?" ,['Mexico City' , 'New York City' , 'Taipei City' ]],
273
+ ["Which is the highest grossing film of 1998" ,["Armageddon" ,"Saving Private Ryan" ,"Godzilla" ]],
274
+ ["Which NBA team has never produced a Slam Dunk Contest winner?" ,['Orlando Magic' , 'Philadelphia 76ers' , 'LA Clippers' ]],
235
275
["Which underwear brand licenses the name of a former tennis star?" ,["Giorgio Armani" , "Bjérn Borg" , "Calvin Klein" ]],
236
276
["What company built the first mobile phone?" ,["Motorola" ,"Nokia" ,"Ericsson" ]]]
237
277
searcher = Searcher ()
238
278
for q in questions :
239
279
print ("\n Question:\n " ,q )
280
+ start = time .time ()
240
281
searcher .search_answer (q [0 ],q [1 ])
282
+ end = time .time ()
283
+ print ("time spent: " ,end - start )
0 commit comments