-
Notifications
You must be signed in to change notification settings - Fork 0
/
search.py
122 lines (106 loc) · 4.49 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import requests
import math
from flask import (
Blueprint, flash, g, redirect, render_template, request, url_for, jsonify
)
from werkzeug.exceptions import abort
from Soara.auth import login_required
from Soara.db import get_db
bp = Blueprint('search', __name__)
# soara_endpoint = "http://127.0.0.1:5000/"
import sqlite3
import re
def match(expr, item):
reg = re.compile(expr, re.I)
return reg.search(item) is not None
@login_required
@bp.route('/search/<string:q>/<int:page>', methods=['GET', 'POST'])
def search_corpus(q, page):
# print(request.args.get('string:q'))
if request.method == 'GET':
pos = ".*" # the pos regex query
q = q.strip()
if q == "":
return render_template('index.html', results = [["0 Total Results for \' \'"]], pages = [1], params = q)
q_unchanged = q
if match(r'(\<[A-Z]{1,4}\>)|(\<\.\>)', q):
# Method: 2 pass regex filtering (Query + POS)
# q = Hello \w+ my name is \w+
# pos = \w+ PRON \w+ \w+ NOUN
pos_count = 0
all_pos = ['NOUN','PRON','ADJ','VERB','ADV','ADP','PRT','DET','CONJ','NUM']
print("GRAMMAR SEARCH:")
print(f"plain q: {q}")
# pos_exp = re.findall(r'\<(.*?)\>', q)
q_tmp = re.findall(r'[a-zA-Z0-9]+', q)
pos = []
for i,v in enumerate(q_tmp):
if v in all_pos:
q_tmp[i] = "[a-zA-Z0-9]+"
pos.append(v)
pos_count += 1
else:
pos.append("[A-Z.]+")
if pos_count == len(q_tmp):
q = ".*"
else:
q = " ".join(q_tmp)
pos = " ".join(pos)
cursor = get_db()
cursor.create_function("REGEXP", 2, match)
select_query = 'SELECT COUNT(*) FROM corpus WHERE text REGEXP ? AND pos REGEXP ?;'
count = cursor.execute(
select_query,
(q,pos)).fetchone()
total_pages = int(math.ceil(int(tuple(count)[0]) / 30 + 0.01))
pages = list(range(1, total_pages+1))
count = str(tuple(count)[0]) + " Total Results for \'" + q_unchanged + "\'."
offset = (page-1) * 30
select_query = 'SELECT channel, text, videos.vid, CAST(timestamp as INTEGER),'\
'row_number() over (order by corpus.ROWID) as r FROM '\
'corpus INNER JOIN videos ON corpus.vid = videos.vid WHERE text REGEXP ? AND pos REGEXP ? LIMIT 30 OFFSET ?;'
result = cursor.execute(
select_query,
# ('% '+q+' %', offset,)).fetchall()
(q, pos, offset,)).fetchall()
result = [tuple(row) for row in result]
result.append([count])
cursor.close()
return render_template('index.html', results = result, pages = pages, params = q)
if request.method == 'POST':
print("recieved post from /search/<string:q>/<int:page> !")
q = request.form['query']
page = 1
if request.form.get('page_no'):
page = request.form['page_no']
return redirect(url_for("search.search_corpus", q=q, page=page), code=301)
# cursor = get_db()
# cursor.create_function("REGEXP", 2, match)
# select_query = 'SELECT COUNT(*) FROM corpus WHERE text REGEXP ?;'
# count = cursor.execute(
# select_query,
# # ('% '+q+' %',)).fetchone()
# (q,)).fetchone()
# total_pages = int(math.ceil(int(tuple(count)[0]) / 30 + 0.01))
# pages = list(range(1, total_pages+1))
# count = str(tuple(count)[0]) + " Total Results for \'" + q + "\'."
# offset = (page-1) * 30
# select_query = 'SELECT channel, text, videos.vid, CAST(timestamp as INTEGER) FROM corpus INNER JOIN videos ON corpus.vid = videos.vid WHERE text REGEXP ? LIMIT 30 OFFSET ?;'
# result = cursor.execute(
# select_query,
# # ('% '+q+' %', offset,)).fetchall()
# (q, offset,)).fetchall()
# result = [tuple(row) for row in result]
# result.append([count])
# cursor.close()
# return render_template('index.html', results = result, pages = pages, params = q)
@bp.route('/', methods=['GET', 'POST'])
@login_required
def index():
results = [("No results", "0 results returned")]
if request.method == 'POST':
print("recieved post from / !")
query = request.form['query']
page = 1
return redirect(url_for("search.search_corpus", q=query, page=page), code=301)
return render_template('index.html', results = results)