Skip to content

Commit 8e37183

Browse files
committed
Change query_hash calculation method to concatenate original query and parameters instead of final query.
1 parent 10a46fd commit 8e37183

File tree

13 files changed

+76
-45
lines changed

13 files changed

+76
-45
lines changed

redash/handlers/query_results.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ def run_query(query, parameters, data_source, query_id, should_apply_auto_limit,
6868

6969
return error_response(message)
7070

71+
query_hash = data_source.query_runner.gen_query_hash(query.text, parameters, should_apply_auto_limit)
72+
7173
try:
7274
query.apply(parameters)
7375
except (InvalidParameterError, QueryDetachedFromDataSourceError) as e:
@@ -81,7 +83,7 @@ def run_query(query, parameters, data_source, query_id, should_apply_auto_limit,
8183
if max_age == 0:
8284
query_result = None
8385
else:
84-
query_result = models.QueryResult.get_latest(data_source, query_text, max_age)
86+
query_result = models.QueryResult.get_latest(data_source, query_hash, max_age)
8587

8688
record_event(
8789
current_user.org,
@@ -102,6 +104,7 @@ def run_query(query, parameters, data_source, query_id, should_apply_auto_limit,
102104
else:
103105
job = enqueue_query(
104106
query_text,
107+
query_hash,
105108
data_source,
106109
current_user.id,
107110
current_user.is_api_user(),

redash/models/__init__.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@
7070
)
7171
from redash.utils import (
7272
base_url,
73-
gen_query_hash,
7473
generate_token,
7574
json_dumps,
7675
json_loads,
@@ -343,9 +342,7 @@ def unused(cls, days=7):
343342
)
344343

345344
@classmethod
346-
def get_latest(cls, data_source, query, max_age=0):
347-
query_hash = gen_query_hash(query)
348-
345+
def get_latest(cls, data_source, query_hash, max_age=0):
349346
if max_age == -1 and settings.QUERY_RESULTS_EXPIRED_TTL_ENABLED:
350347
max_age = settings.QUERY_RESULTS_EXPIRED_TTL
351348

@@ -815,20 +812,19 @@ def dashboard_api_keys(self):
815812
def update_query_hash(self):
816813
should_apply_auto_limit = self.options.get("apply_auto_limit", False) if self.options else False
817814
query_runner = self.data_source.query_runner if self.data_source else BaseQueryRunner({})
818-
query_text = self.query_text
819815

820816
parameters_dict = {p["name"]: p.get("value") for p in self.parameters} if self.options else {}
821817
if any(parameters_dict):
822818
try:
823-
query_text = self.parameterized.apply(parameters_dict).query
819+
self.parameterized.apply(parameters_dict).query
824820
except InvalidParameterError as e:
825821
logging.info(f"Unable to update hash for query {self.id} because of invalid parameters: {str(e)}")
826822
except QueryDetachedFromDataSourceError as e:
827823
logging.info(
828824
f"Unable to update hash for query {self.id} because of dropdown query {e.query_id} is unattached from datasource"
829825
)
830826

831-
self.query_hash = query_runner.gen_query_hash(query_text, should_apply_auto_limit)
827+
self.query_hash = query_runner.gen_query_hash(self.query_text, parameters_dict, should_apply_auto_limit)
832828

833829

834830
@listens_for(Query, "before_insert")

redash/query_runner/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,10 @@ def supports_auto_limit(self):
261261
def apply_auto_limit(self, query_text, should_apply_auto_limit):
262262
return query_text
263263

264-
def gen_query_hash(self, query_text, set_auto_limit=False):
265-
query_text = self.apply_auto_limit(query_text, set_auto_limit)
266-
return utils.gen_query_hash(query_text)
264+
def gen_query_hash(self, query_text, parameters={}, set_auto_limit=False):
265+
if not self.supports_auto_limit:
266+
set_auto_limit = False
267+
return utils.gen_query_hash(query_text, parameters, set_auto_limit)
267268

268269

269270
class BaseSQLQueryRunner(BaseQueryRunner):

redash/tasks/queries/execution.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ def _unlock(query_hash, data_source_id):
2727
redis_connection.delete(_job_lock_id(query_hash, data_source_id))
2828

2929

30-
def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}):
31-
query_hash = gen_query_hash(query)
30+
def enqueue_query(query, query_hash, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}):
3231
logger.info("Inserting job for %s with metadata=%s", query_hash, metadata)
3332
try_count = 0
3433
job = None
@@ -99,7 +98,9 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query
9998
if not scheduled_query:
10099
enqueue_kwargs["result_ttl"] = settings.JOB_EXPIRY_TIME
101100

102-
job = queue.enqueue(execute_query, query, data_source.id, metadata, **enqueue_kwargs)
101+
job = queue.enqueue(
102+
execute_query, query, data_source.id, metadata, query_hash=query_hash, **enqueue_kwargs
103+
)
103104

104105
logger.info("[%s] Created new job: %s", query_hash, job.id)
105106
pipe.set(
@@ -146,9 +147,10 @@ def _resolve_user(user_id, is_api_key, query_id):
146147

147148

148149
class QueryExecutor:
149-
def __init__(self, query, data_source_id, user_id, is_api_key, metadata, is_scheduled_query):
150+
def __init__(self, query, query_hash, data_source_id, user_id, is_api_key, metadata, is_scheduled_query):
150151
self.job = get_current_job()
151152
self.query = query
153+
self.query_hash = query_hash or gen_query_hash(query)
152154
self.data_source_id = data_source_id
153155
self.metadata = metadata
154156
self.data_source = self._load_data_source()
@@ -162,7 +164,6 @@ def __init__(self, query, data_source_id, user_id, is_api_key, metadata, is_sche
162164

163165
# Close DB connection to prevent holding a connection for a long time while the query is executing.
164166
models.db.session.close()
165-
self.query_hash = gen_query_hash(self.query)
166167
self.is_scheduled_query = is_scheduled_query
167168
if self.is_scheduled_query:
168169
# Load existing tracker or create a new one if the job was created before code update:
@@ -271,10 +272,12 @@ def execute_query(
271272
user_id=None,
272273
scheduled_query_id=None,
273274
is_api_key=False,
275+
query_hash=None,
274276
):
275277
try:
276278
return QueryExecutor(
277279
query,
280+
query_hash,
278281
data_source_id,
279282
user_id,
280283
is_api_key,

redash/tasks/queries/maintenance.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def refresh_queries():
9393
query_text = _apply_auto_limit(query_text, query)
9494
enqueue_query(
9595
query_text,
96+
query.query_hash,
9697
query.data_source,
9798
query.user_id,
9899
scheduled_query=query,

redash/utils/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def slugify(s):
5050
return re.sub(r"[^a-z0-9_\-]+", "-", s.lower())
5151

5252

53-
def gen_query_hash(sql):
53+
def gen_query_hash(sql, parameters={}, auto_limit=False):
5454
"""Return hash of the given query after stripping all comments, line breaks
5555
and multiple spaces.
5656
@@ -60,6 +60,10 @@ def gen_query_hash(sql):
6060
"""
6161
sql = COMMENTS_REGEX.sub("", sql)
6262
sql = "".join(sql.split())
63+
64+
query_parameters = {"parameters": parameters, "auto_limit": auto_limit}
65+
sql += "\n" + json.dumps(query_parameters, sort_keys=True, separators=(",", ":"))
66+
6367
return hashlib.md5(sql.encode("utf-8")).hexdigest()
6468

6569

tests/factories.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def __call__(self):
126126
runtime=1,
127127
retrieved_at=utcnow,
128128
query_text="SELECT 1",
129-
query_hash=gen_query_hash("SELECT 1"),
129+
query_hash=gen_query_hash("SELECT 1", {}, False),
130130
data_source=data_source_factory.create,
131131
org_id=1,
132132
)

tests/models/test_queries.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def setUp(self):
464464
super(TestQueryUpdateLatestResult, self).setUp()
465465
self.data_source = self.factory.data_source
466466
self.query = "SELECT 1"
467-
self.query_hash = gen_query_hash(self.query)
467+
self.query_hash = gen_query_hash(self.query, {}, False)
468468
self.runtime = 123
469469
self.utcnow = utcnow()
470470
self.data = {"columns": {}, "rows": []}

tests/models/test_query_results.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,30 @@ def test_get_latest_returns_none_if_not_found(self):
1212

1313
def test_get_latest_returns_when_found(self):
1414
qr = self.factory.create_query_result()
15-
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, 60)
15+
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_hash, 60)
1616

1717
self.assertEqual(qr, found_query_result)
1818

1919
def test_get_latest_doesnt_return_query_from_different_data_source(self):
2020
qr = self.factory.create_query_result()
2121
data_source = self.factory.create_data_source()
22-
found_query_result = models.QueryResult.get_latest(data_source, qr.query_text, 60)
22+
found_query_result = models.QueryResult.get_latest(data_source, qr.query_hash, 60)
2323

2424
self.assertIsNone(found_query_result)
2525

2626
def test_get_latest_doesnt_return_if_ttl_expired(self):
2727
yesterday = utcnow() - datetime.timedelta(days=1)
2828
qr = self.factory.create_query_result(retrieved_at=yesterday)
2929

30-
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, max_age=60)
30+
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_hash, max_age=60)
3131

3232
self.assertIsNone(found_query_result)
3333

3434
def test_get_latest_returns_if_ttl_not_expired(self):
3535
yesterday = utcnow() - datetime.timedelta(seconds=30)
3636
qr = self.factory.create_query_result(retrieved_at=yesterday)
3737

38-
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, max_age=120)
38+
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_hash, max_age=120)
3939

4040
self.assertEqual(found_query_result, qr)
4141

@@ -44,7 +44,7 @@ def test_get_latest_returns_the_most_recent_result(self):
4444
self.factory.create_query_result(retrieved_at=yesterday)
4545
qr = self.factory.create_query_result()
4646

47-
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, 60)
47+
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_hash, 60)
4848

4949
self.assertEqual(found_query_result.id, qr.id)
5050

@@ -54,7 +54,7 @@ def test_get_latest_returns_the_last_cached_result_for_negative_ttl(self):
5454

5555
yesterday = utcnow() + datetime.timedelta(days=-1)
5656
qr = self.factory.create_query_result(retrieved_at=yesterday)
57-
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, -1)
57+
found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_hash, -1)
5858

5959
self.assertEqual(found_query_result.id, qr.id)
6060

tests/query_runner/test_basesql_queryrunner.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import unittest
22

33
from redash.query_runner import BaseQueryRunner, BaseSQLQueryRunner
4-
from redash.utils import gen_query_hash
54

65

76
class TestBaseSQLQueryRunner(unittest.TestCase):
@@ -118,16 +117,16 @@ def test_apply_auto_limit_inline_comment(self):
118117

119118
def test_gen_query_hash_baseSQL(self):
120119
origin_query_text = "select *"
121-
expected_query_text = "select * LIMIT 1000"
122-
base_runner = BaseQueryRunner({})
123-
self.assertEqual(
124-
base_runner.gen_query_hash(expected_query_text), self.query_runner.gen_query_hash(origin_query_text, True)
125-
)
120+
base_hash = self.query_runner.gen_query_hash(origin_query_text)
121+
self.assertEqual(base_hash, self.query_runner.gen_query_hash(origin_query_text, {}, False))
122+
self.assertNotEqual(base_hash, self.query_runner.gen_query_hash(origin_query_text, {}, True))
126123

127124
def test_gen_query_hash_NoneSQL(self):
128125
origin_query_text = "select *"
129126
base_runner = BaseQueryRunner({})
130-
self.assertEqual(gen_query_hash(origin_query_text), base_runner.gen_query_hash(origin_query_text, True))
127+
self.assertEqual(
128+
base_runner.gen_query_hash(origin_query_text), base_runner.gen_query_hash(origin_query_text, {}, True)
129+
)
131130

132131

133132
if __name__ == "__main__":

0 commit comments

Comments
 (0)