From ba973eb1fe27406f4e8cb6db74d46cc14c163778 Mon Sep 17 00:00:00 2001 From: Arik Fraimovich Date: Fri, 25 Oct 2024 04:00:29 +0300 Subject: [PATCH] Fixes #6767: correctly rehash queries in a migration (#7184) --- migrations/versions/9e8c841d1a30_fix_hash.py | 64 ++++++++++++++++++++ redash/cli/queries.py | 16 +++++ redash/metrics/database.py | 4 +- 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 migrations/versions/9e8c841d1a30_fix_hash.py diff --git a/migrations/versions/9e8c841d1a30_fix_hash.py b/migrations/versions/9e8c841d1a30_fix_hash.py new file mode 100644 index 0000000000..966d393391 --- /dev/null +++ b/migrations/versions/9e8c841d1a30_fix_hash.py @@ -0,0 +1,64 @@ +"""fix_hash + +Revision ID: 9e8c841d1a30 +Revises: 7205816877ec +Create Date: 2024-10-05 18:55:35.730573 + +""" +import logging +from alembic import op +import sqlalchemy as sa +from sqlalchemy.sql import table +from sqlalchemy import select + +from redash.query_runner import BaseQueryRunner, get_query_runner + + +# revision identifiers, used by Alembic. +revision = '9e8c841d1a30' +down_revision = '7205816877ec' +branch_labels = None +depends_on = None + + +def update_query_hash(record): + should_apply_auto_limit = record['options'].get("apply_auto_limit", False) if record['options'] else False + query_runner = get_query_runner(record['type'], {}) if record['type'] else BaseQueryRunner({}) + query_text = record['query'] + + parameters_dict = {p["name"]: p.get("value") for p in record['options'].get('parameters', [])} if record.options else {} + if any(parameters_dict): + print(f"Query {record['query_id']} has parameters. Hash might be incorrect.") + + return query_runner.gen_query_hash(query_text, should_apply_auto_limit) + + +def upgrade(): + conn = op.get_bind() + + metadata = sa.MetaData(bind=conn) + queries = sa.Table("queries", metadata, autoload=True) + data_sources = sa.Table("data_sources", metadata, autoload=True) + + joined_table = queries.outerjoin(data_sources, queries.c.data_source_id == data_sources.c.id) + + query = select([ + queries.c.id.label("query_id"), + queries.c.query, + queries.c.query_hash, + queries.c.options, + data_sources.c.id.label("data_source_id"), + data_sources.c.type + ]).select_from(joined_table) + + for record in conn.execute(query): + new_hash = update_query_hash(record) + print(f"Updating hash for query {record['query_id']} from {record['query_hash']} to {new_hash}") + conn.execute( + queries.update() + .where(queries.c.id == record['query_id']) + .values(query_hash=new_hash)) + + +def downgrade(): + pass \ No newline at end of file diff --git a/redash/cli/queries.py b/redash/cli/queries.py index 54943dee3c..dddb35724e 100644 --- a/redash/cli/queries.py +++ b/redash/cli/queries.py @@ -5,6 +5,22 @@ manager = AppGroup(help="Queries management commands.") +@manager.command(name="rehash") +def rehash(): + from redash import models + + for q in models.Query.query.all(): + old_hash = q.query_hash + q.update_query_hash() + new_hash = q.query_hash + + if old_hash != new_hash: + print(f"Query {q.id} has changed hash from {old_hash} to {new_hash}") + models.db.session.add(q) + + models.db.session.commit() + + @manager.command(name="add_tag") @argument("query_id") @argument("tag") diff --git a/redash/metrics/database.py b/redash/metrics/database.py index 152427b2e5..6f16bdbc6d 100644 --- a/redash/metrics/database.py +++ b/redash/metrics/database.py @@ -5,7 +5,7 @@ from sqlalchemy.engine import Engine from sqlalchemy.event import listens_for from sqlalchemy.orm.util import _ORMJoin -from sqlalchemy.sql.selectable import Alias +from sqlalchemy.sql.selectable import Alias, Join from redash import statsd_client @@ -18,7 +18,7 @@ def _table_name_from_select_element(elt): if isinstance(t, Alias): t = t.original.froms[0] - while isinstance(t, _ORMJoin): + while isinstance(t, _ORMJoin) or isinstance(t, Join): t = t.left return t.name