From 787cfc13076fd96bf911fe81f47b1f90a93feb57 Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 27 May 2021 14:30:09 -0700 Subject: [PATCH 01/16] port dbt-expect... macros to sql server & synapse --- integration_tests/dbt_utils/dbt_project.yml | 4 + .../_generalized/equal_expression.sql | 87 +++++++++- .../_generalized/expression_is_true.sql | 48 ++++++ ..._column_most_common_value_to_be_in_set.sql | 100 ++++++++++++ .../expect_column_stdev_to_be_between.sql | 27 ++++ ...mn_values_to_be_within_n_moving_stdevs.sql | 149 ++++++++++++++++++ ...ct_column_values_to_be_within_n_stdevs.sql | 63 ++++++++ ...lumn_values_to_be_unique_within_record.sql | 82 ++++++++++ ...grouped_row_values_to_have_recent_data.sql | 31 ++++ 9 files changed, 589 insertions(+), 2 deletions(-) create mode 100644 macros/dbt_expectations/schema_tests/_generalized/expression_is_true.sql create mode 100644 macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql create mode 100644 macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql create mode 100644 macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql create mode 100644 macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql create mode 100644 macros/dbt_expectations/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql create mode 100644 macros/dbt_expectations/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql diff --git a/integration_tests/dbt_utils/dbt_project.yml b/integration_tests/dbt_utils/dbt_project.yml index 344526e..394e17c 100644 --- a/integration_tests/dbt_utils/dbt_project.yml +++ b/integration_tests/dbt_utils/dbt_project.yml @@ -42,6 +42,10 @@ models: test_get_relations_by_prefix_and_union: *disabled test_groupby: *disabled # TSQL doesn't let you group by column numbers test_union: *not-synapse + emails: *disabled #TODO + timeseries_data: *disabled #TODO + data_test: *not-synapse # Need to implement for synapse + data_test_factored: *not-synapse # Need to implement for synapse web: # BROKEN DUE TO TEST DEFS test_url_host: *disabled test_url_path: *disabled diff --git a/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql b/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql index 7b7cc57..2a55af8 100644 --- a/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql +++ b/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql @@ -10,8 +10,91 @@ where {{ row_condition }} {% endif %} + {%- if group_by|length > 1 or group_by[0] != "'col'" %} group by {% for g in group_by -%} - {{ g }}{% if not loop.last %}, {% endif %} + {{ g }}{% if not loop.last %},{% endif %} {% endfor %} -{% endmacro -%} \ No newline at end of file + {% endif %} +{% endmacro -%} + +{%- macro sqlserver__test_equal_expression(model, expression, + compare_model, + compare_expression, + group_by, + compare_group_by, + row_condition, + compare_row_condition, + tolerance, + tolerance_percent, + return_difference) -%} + + {%- set compare_model = model if not compare_model else compare_model -%} + {%- set compare_expression = expression if not compare_expression else compare_expression -%} + {%- set compare_row_condition = row_condition if not compare_row_condition else compare_row_condition -%} + {%- set compare_group_by = group_by if not compare_group_by else compare_group_by -%} + + {%- set n_cols = group_by|length %} + with a as ( + {{ dbt_expectations.get_select(model, expression, row_condition, group_by) }} + ), + b as ( + {{ dbt_expectations.get_select(compare_model, compare_expression, compare_row_condition, compare_group_by) }} + ), + final as ( + + select + {% for i in range(1, n_cols + 1) -%} + coalesce(a.col_{{ i }}, b.col_{{ i }}) as col_{{ i }}, + {% endfor %} + a.expression, + b.expression as compare_expression, + abs(coalesce(a.expression, 0) - coalesce(b.expression, 0)) as expression_difference, + abs(coalesce(a.expression, 0) - coalesce(b.expression, 0))/ + nullif(a.expression, 0) as expression_difference_percent + from + a + full outer join + b on + {% for i in range(1, n_cols + 1) -%} + a.col_{{ i }} = b.col_{{ i }} {% if not loop.last %}and{% endif %} + {% endfor -%} + ) + -- DEBUG: + -- select * from final + select + {% if return_difference %} + coalesce(sum(expression_difference), 0) + {% else %} + count(*) + {% endif %} + from final + where + {% if tolerance_percent %} + expression_difference_percent > {{ tolerance_percent }} + {% else %} + expression_difference > {{ tolerance }} + {% endif %} +{%- endmacro -%} + +{%- macro synapse__test_equal_expression(model, expression, + compare_model, + compare_expression, + group_by, + compare_group_by, + row_condition, + compare_row_condition, + tolerance, + tolerance_percent, + return_difference) -%} + {% do return( tsql_utils.sqlserver__test_equal_expression(model, expression, + compare_model, + compare_expression, + group_by, + compare_group_by, + row_condition, + compare_row_condition, + tolerance, + tolerance_percent, + return_difference)) -%} +{%- endmacro -%} \ No newline at end of file diff --git a/macros/dbt_expectations/schema_tests/_generalized/expression_is_true.sql b/macros/dbt_expectations/schema_tests/_generalized/expression_is_true.sql new file mode 100644 index 0000000..f8523c2 --- /dev/null +++ b/macros/dbt_expectations/schema_tests/_generalized/expression_is_true.sql @@ -0,0 +1,48 @@ +{% macro sqlserver__expression_is_true(model, expression, test_condition, group_by_columns, row_condition) %} + +{% if test_condition == "= true" %} + {% set test_condition = "= 1" %} +{% endif %} + + +with grouped_expression as ( + + select + {% if group_by_columns %} + {% for group_by_column in group_by_columns -%} + {{ group_by_column }} as col_{{ loop.index }}, + {% endfor -%} + {% endif %} + case when {{ expression }} then 1 else 0 end as expression + from {{ model }} + {%- if row_condition %} + where + {{ row_condition }} + {% endif %} + {% if group_by_columns %} + group by + {% for group_by_column in group_by_columns -%} + {{ group_by_column }}{% if not loop.last %},{% endif %} + {% endfor %} + {% endif %} + +), +validation_errors as ( + + select + * + from + grouped_expression + where + not(expression {{ test_condition }}) + +) + +select count(*) +from validation_errors + +{% endmacro %} + +{% macro synapse__expression_is_true(model, expression, test_condition, group_by_columns, row_condition) %} + {% do return( tsql_utils.sqlserver__expression_is_true(model, expression, test_condition, group_by_columns, row_condition)) %} +{% endmacro %} \ No newline at end of file diff --git a/macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql b/macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql new file mode 100644 index 0000000..a08c016 --- /dev/null +++ b/macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql @@ -0,0 +1,100 @@ +{% macro sqlserver__test_expect_column_most_common_value_to_be_in_set(model, column_name, + value_set, + top_n, + quote_values=False, + data_type="decimal", + row_condition=None + ) -%} + +with value_counts as ( + + select + {% if quote_values -%} + {{ column_name }} + {%- else -%} + cast({{ column_name }} as {{ data_type }}) + {%- endif %} as value_field, + count(*) as value_count + + from {{ model }} + {% if row_condition %} + where {{ row_condition }} + {% endif %} + + group by {% if quote_values -%} + {{ column_name }} + {%- else -%} + cast({{ column_name }} as {{ data_type }}) + {%- endif %} + +), +value_counts_ranked as ( + + select + *, + row_number() over(order by value_count desc) as value_count_rank + from + value_counts + +), +value_count_top_n as ( + + select + value_field + from + value_counts_ranked + where + value_count_rank = {{ top_n }} + +), +set_values as ( + + {% for value in value_set -%} + select + {% if quote_values -%} + '{{ value }}' + {%- else -%} + cast({{ value }} as {{ data_type }}) + {%- endif %} as value_field + {% if not loop.last %}union all{% endif %} + {% endfor %} + +), +unique_set_values as ( + + select distinct value_field + from + set_values + +), +validation_errors as ( + -- values from the model that are not in the set + select + value_field + from + value_count_top_n + where + value_field not in (select value_field from unique_set_values) + +) + +select count(*) as validation_errors +from validation_errors + +{% endmacro %} + +{% macro synapse__test_expect_column_most_common_value_to_be_in_set(model, column_name, + value_set, + top_n, + quote_values, + data_type, + row_condition + ) -%} + {% do return( tsql_utils.sqlserver__test_expect_column_most_common_value_to_be_in_set(model, column_name, + value_set, + top_n, + quote_values=False, + data_type="decimal", + row_condition=None + )) -%} +{% endmacro %} \ No newline at end of file diff --git a/macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql b/macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql new file mode 100644 index 0000000..39db47d --- /dev/null +++ b/macros/dbt_expectations/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql @@ -0,0 +1,27 @@ +{% macro sqlserver__test_expect_column_stdev_to_be_between(model, column_name, + min_value, + max_value, + row_condition=None + ) -%} +{% set expression %} +stdev({{ column_name }}) +{% endset %} +{{ dbt_expectations.expression_between(model, + expression=expression, + min_value=min_value, + max_value=max_value, + row_condition=row_condition + ) }} +{% endmacro %} + +{% macro synapse__test_expect_column_stdev_to_be_between(model, column_name, + min_value, + max_value, + row_condition + ) -%} + {% do return( tsql_utils.sqlserver__test_expect_column_stdev_to_be_between(model, column_name, + min_value, + max_value, + row_condition=None + )) -%} +{% endmacro %} \ No newline at end of file diff --git a/macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql b/macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql new file mode 100644 index 0000000..84f8834 --- /dev/null +++ b/macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_moving_stdevs.sql @@ -0,0 +1,149 @@ +{%- macro _get_metric_expression(metric_column, take_logs) -%} + +{%- if take_logs %} +{%- set expr = "nullif(" ~ metric_column ~ ", 0)" -%} +coalesce({{ dbt_expectations.log_natural(expr) }}, 0) +{%- else -%} +coalesce({{ metric_column }}, 0) +{%- endif %} + +{%- endmacro -%} + + +{% macro sqlserver__test_expect_column_values_to_be_within_n_moving_stdevs(model, + column_name, + date_column_name, + period, + lookback_periods, + trend_periods, + test_periods, + sigma_threshold, + sigma_threshold_upper, + sigma_threshold_lower, + take_diffs, + take_logs + ) %} + +{%- set sigma_threshold_upper = sigma_threshold_upper if sigma_threshold_upper else sigma_threshold -%} +{%- set sigma_threshold_lower = sigma_threshold_lower if sigma_threshold_lower else -1 * sigma_threshold -%} + + + +with grouped_metric_values as ( + + select + {{ dbt_utils.date_trunc(period, date_column_name) }} as metric_period, + sum({{ column_name }}) as agg_metric_value + from + {{ model }} + group by + {{ dbt_utils.date_trunc(period, date_column_name) }} + +), + +grouped_metric_values_with_priors as ( + + select + *, + lag(agg_metric_value, {{ lookback_periods }}) over(order by metric_period) as prior_agg_metric_value + from + grouped_metric_values d + +), + +{%- if take_diffs %} + +metric_values as ( + + select + *, + {{ dbt_expectations._get_metric_expression("agg_metric_value", take_logs) }} + - + {{ dbt_expectations._get_metric_expression("prior_agg_metric_value", take_logs) }} + as metric_test_value + from + grouped_metric_values_with_priors d +), + +{%- else %} + +metric_values as ( + select + *, + {{ dbt_expectations._get_metric_expression("agg_metric_value", take_logs) }} + from + grouped_metric_values +), +{%- endif %} + +metric_moving_calcs as ( + + select + *, + avg(metric_test_value) + over(order by metric_period rows + between {{ trend_periods }} preceding and 1 preceding) as metric_test_rolling_average, + stdev(metric_test_value) + over(order by metric_period rows + between {{ trend_periods }} preceding and 1 preceding) as metric_test_rolling_stddev + from + metric_values + +), +metric_sigma as ( + + select + *, + (metric_test_value - metric_test_rolling_average) as metric_test_delta, + (metric_test_value - metric_test_rolling_average)/nullif(metric_test_rolling_stddev, 0) as metric_test_sigma + from + metric_moving_calcs + +) +select + count(*) +from + metric_sigma +where + + metric_period >= cast( + {{ dbt_utils.dateadd(period, -test_periods, dbt_utils.date_trunc(period, dbt_date.now())) }} + as {{ dbt_utils.type_timestamp() }}) + and + metric_period < {{ dbt_utils.date_trunc(period, dbt_date.now()) }} + and + + not ( + metric_test_sigma >= {{ sigma_threshold_lower }} and + metric_test_sigma <= {{ sigma_threshold_upper }} + ) +{%- endmacro -%} + + +{% macro synapse__test_expect_column_values_to_be_within_n_moving_stdevs(model, + column_name, + date_column_name, + period, + lookback_periods, + trend_periods, + test_periods, + sigma_threshold, + sigma_threshold_upper, + sigma_threshold_lower, + take_diffs, + take_logs + ) %} + {% do return( tsql_utils.sqlserver__test_expect_column_values_to_be_within_n_moving_stdevs(model, + column_name, + date_column_name, + period, + lookback_periods, + trend_periods, + test_periods, + sigma_threshold, + sigma_threshold_upper, + sigma_threshold_lower, + take_diffs, + take_logs + )) %} +{%- endmacro -%} diff --git a/macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql b/macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql new file mode 100644 index 0000000..3ac8d14 --- /dev/null +++ b/macros/dbt_expectations/schema_tests/distributional/expect_column_values_to_be_within_n_stdevs.sql @@ -0,0 +1,63 @@ + +{% macro sqlserver__test_expect_column_values_to_be_within_n_stdevs(model, + column_name, + group_by, + sigma_threshold + ) %} +with metric_values as ( + + {% if group_by -%} + select + {{ group_by }} as metric_date, + sum({{ column_name }}) as {{ column_name }} + from + {{ model }} + group by + {{ group_by }} + {%- else -%} + select + {{ column_name }} as {{ column_name }} + from + {{ model }} + {%- endif %} + +), +metric_values_with_statistics as ( + + select + *, + avg({{ column_name }}) over() as {{ column_name }}_average, + stdev({{ column_name }}) over() as {{ column_name }}_stddev + from + metric_values + +), +metric_values_z_scores as ( + + select + *, + ({{ column_name }} - {{ column_name }}_average)/{{ column_name }}_stddev as {{ column_name }}_sigma + from + metric_values_with_statistics + +) +select + count(*) as error_count +from + metric_values_z_scores +where + abs({{ column_name }}_sigma) > {{ sigma_threshold }} +{%- endmacro %} + + +{% macro synapse__test_expect_column_values_to_be_within_n_stdevs(model, + column_name, + group_by, + sigma_threshold + ) %} + {% do return( tsql_utils.sqlserver__test_expect_column_values_to_be_within_n_stdevs(model, + column_name, + group_by, + sigma_threshold + )) %} +{%- endmacro %} \ No newline at end of file diff --git a/macros/dbt_expectations/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql b/macros/dbt_expectations/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql new file mode 100644 index 0000000..7443ef8 --- /dev/null +++ b/macros/dbt_expectations/schema_tests/multi-column/expect_select_column_values_to_be_unique_within_record.sql @@ -0,0 +1,82 @@ +{% macro sqlserver__test_expect_select_column_values_to_be_unique_within_record(model, + column_list, + quote_columns, + ignore_row_if, + row_condition + ) %} + +{% if not quote_columns %} + {%- set columns=column_list %} +{% elif quote_columns %} + {%- set columns=[] %} + {% for column in column_list -%} + {% set columns = columns.append( adapter.quote(column) ) %} + {%- endfor %} +{% else %} + {{ exceptions.raise_compiler_error( + "`quote_columns` argument for unique_combination_of_columns test must be one of [True, False] Got: '" ~ quote_columns ~"'.'" + ) }} +{% endif %} + +with column_values as ( + + select + row_number() over(order by (SELECT 'does order really matter here?')) as row_index, + {% for column in columns -%} + {{ column }}{% if not loop.last %},{% endif %} + {%- endfor %} + from {{ model }} + where 1=1 + {% if row_condition %} + and {{ row_condition }} + {% endif %} + {% if ignore_row_if == "all_values_are_missing" %} + and + ( + {% for column in columns -%} + {{ column }} is not null{% if not loop.last %} and {% endif %} + {%- endfor %} + ) + {% elif ignore_row_if == "any_value_is_missing" %} + and + ( + {% for column in columns -%} + {{ column }} is not null{% if not loop.last %} or {% endif %} + {%- endfor %} + ) + {% endif %} + +), +unpivot_columns as ( + + {% for column in columns %} + select row_index, '{{ column }}' as column_name, {{ column }} as column_value from column_values + {% if not loop.last %}union all{% endif %} + {% endfor %} +), +validation_errors as ( + + select + row_index, + count(distinct column_value) as column_values + from unpivot_columns + group by row_index + having count(distinct column_value) < {{ columns | length }} + +) +select count(*) from validation_errors +{% endmacro %} + +{% macro synapse__test_expect_select_column_values_to_be_unique_within_record(model, + column_list, + quote_columns, + ignore_row_if, + row_condition + ) %} + {% do return( tsql_utils.sqlserver__test_expect_select_column_values_to_be_unique_within_record(model, + column_list, + quote_columns, + ignore_row_if, + row_condition + )) %} +{% endmacro %} \ No newline at end of file diff --git a/macros/dbt_expectations/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql b/macros/dbt_expectations/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql new file mode 100644 index 0000000..91a751b --- /dev/null +++ b/macros/dbt_expectations/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql @@ -0,0 +1,31 @@ +{% macro sqlserver__test_expect_grouped_row_values_to_have_recent_data(model, group_by, timestamp_column, datepart, interval) %} +with latest_grouped_timestamps as ( + + select + {%- for g in group_by %} + {{ g }}, + {%- endfor %} + max({{ timestamp_column }}) as latest_timestamp_column + from + {{ model }} + group by + {%- for g in group_by %} + {{g}}{%- if not loop.last %}, {%- endif %} + {%- endfor %} + +), +validation_errors as ( + + select * + from + latest_grouped_timestamps + where + latest_timestamp_column < {{ dbt_utils.dateadd(datepart, interval * -1, dbt_date.now()) }} + +) +select count(*) from validation_errors +{% endmacro %} + +{% macro synapse__test_expect_grouped_row_values_to_have_recent_data(model, group_by, timestamp_column, datepart, interval) %} + {% do return( tsql_utils.sqlserver__test_expect_grouped_row_values_to_have_recent_data(model, group_by, timestamp_column, datepart, interval)) %} +{% endmacro %} \ No newline at end of file From 5be86707f408e8891ba531e00299a0222ec3bb80 Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 27 May 2021 15:20:13 -0700 Subject: [PATCH 02/16] pointing to temporary branches --- .gitmodules | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index a09d98f..04c22a1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,13 +4,13 @@ branch = master [submodule "dbt-expectations"] path = dbt-expectations - url = https://github.com/calogica/dbt-expectations - branch = main + url = https://github.com/alieus/dbt-expectations + branch = dispatch_macros [submodule "dbt-date"] path = dbt-date url = https://github.com/calogica/dbt-date branch = main [submodule "dbt-audit-helper"] path = dbt-audit-helper - url = https://github.com/fishtown-analytics/dbt-audit-helper - branch = master + url = https://github.com/alieus/dbt-audit-helper + branch = adapter_dispatch From 597d8a34bd4d28a4e3900c4abba13618fb488faf Mon Sep 17 00:00:00 2001 From: Alieu Date: Fri, 28 May 2021 14:31:28 -0700 Subject: [PATCH 03/16] dbt-expectations dispatch PR merged --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 04c22a1..39748fd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,8 +4,8 @@ branch = master [submodule "dbt-expectations"] path = dbt-expectations - url = https://github.com/alieus/dbt-expectations - branch = dispatch_macros + url = https://github.com/calogica/dbt-expectations + branch = main [submodule "dbt-date"] path = dbt-date url = https://github.com/calogica/dbt-date From 1d3d41e027a92e182ced91850e1c7102f0a10c1f Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 2 Jun 2021 17:31:36 -0400 Subject: [PATCH 04/16] move to right commit --- dbt-expectations | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt-expectations b/dbt-expectations index 2a58ec6..cd3ff97 160000 --- a/dbt-expectations +++ b/dbt-expectations @@ -1 +1 @@ -Subproject commit 2a58ec6969307330ca97d8cec18b80d488c24624 +Subproject commit cd3ff974a21d5d98d26ab43ce1599fd529dbba22 From 4ede60ad2d70dc85117d0f76a8eee1dcfd4f7355 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 2 Jun 2021 17:39:25 -0400 Subject: [PATCH 05/16] new version --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f13e786..10c3056 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,7 +28,7 @@ jobs: python3 -m venv venv . venv/bin/activate pip install --upgrade pip setuptools - pip install dbt-synapse + pip install git+https://github.com/dbt-msft/dbt-synapse.git#egg=dbt-synapse mkdir -p ~/.dbt cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml From 989b05c92b152b34b8185b9230ae9683979004b8 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 2 Jun 2021 17:40:28 -0400 Subject: [PATCH 06/16] more checks --- .circleci/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 10c3056..da2a6f1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,6 +31,8 @@ jobs: pip install git+https://github.com/dbt-msft/dbt-synapse.git#egg=dbt-synapse mkdir -p ~/.dbt cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml + dbt --version + pip list | grep "dbt" - run: name: "Run Tests - dbt-utils" From ba4e3bd6eeb362119a5bbfe4393e2f318031001a Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 2 Jun 2021 18:43:14 -0400 Subject: [PATCH 07/16] enable dbt expectations tests --- .circleci/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5485f4b..bd6f7a4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -146,8 +146,8 @@ workflows: context: DBT_SYNAPSE_PROFILE - integration-dbt-utils-azuresql: *dbt-context - integration-dbt-audit-helper-azuresql: *dbt-context - # - integration-dbt-expectations-azuresql: *dbt-context - # - integration-dbt-date-azuresql: *dbt-context + - integration-dbt-expectations-azuresql: *dbt-context + - integration-dbt-date-azuresql: *dbt-context - integration-dbt-utils-synapse: &syn-step <<: *dbt-context requires: @@ -156,12 +156,12 @@ workflows: <<: *dbt-context requires: - start-synapse - # - integration-dbt-expectations-synapse: *syn-step - # - integration-dbt-date-synapse: *syn-step + - integration-dbt-expectations-synapse: *syn-step + - integration-dbt-date-synapse: *syn-step - pause-synapse: <<: *dbt-context requires: - integration-dbt-utils-synapse - integration-dbt-audit-helper-synapse - # - integration-dbt-expectations-synapse - # - integration-dbt-date-synapse + - integration-dbt-expectations-synapse + - integration-dbt-date-synapse From 98653610bc2141020ed1615d8c08d87bdf7b9e39 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 2 Jun 2021 18:54:45 -0400 Subject: [PATCH 08/16] move to correct location --- integration_tests/dbt_expectations/dbt_project.yml | 9 ++++++++- integration_tests/dbt_utils/dbt_project.yml | 5 ----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/integration_tests/dbt_expectations/dbt_project.yml b/integration_tests/dbt_expectations/dbt_project.yml index 8abd016..caefe44 100644 --- a/integration_tests/dbt_expectations/dbt_project.yml +++ b/integration_tests/dbt_expectations/dbt_project.yml @@ -25,4 +25,11 @@ vars: models: dbt_expectations_integration_tests: data_test_factored: - +materialized: table \ No newline at end of file + +materialized: table + emails: &disabled #TODO + +enabled: false + timeseries_data: *disabled + # Need to implement for synapse + data_test: ¬-synapse + +enabled: "{{ target.name != 'synapse' }}" + data_test_factored: *not-synapse \ No newline at end of file diff --git a/integration_tests/dbt_utils/dbt_project.yml b/integration_tests/dbt_utils/dbt_project.yml index 97954f9..d3e25c8 100644 --- a/integration_tests/dbt_utils/dbt_project.yml +++ b/integration_tests/dbt_utils/dbt_project.yml @@ -57,11 +57,6 @@ models: dates: *disabled dim_week: *disabled dim_hour: *disabled - dbt_expectations_integration_tests: - emails: *disabled #TODO - timeseries_data: *disabled #TODO - data_test: *not-synapse # Need to implement for synapse - data_test_factored: *not-synapse # Need to implement for synapse seeds: +quote_columns: false From 78a7bf6aacf9ff681297684fad791be8020899b3 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 2 Jun 2021 19:02:10 -0400 Subject: [PATCH 09/16] correct folder hierarchy --- .../dbt_expectations/dbt_project.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/integration_tests/dbt_expectations/dbt_project.yml b/integration_tests/dbt_expectations/dbt_project.yml index caefe44..2904503 100644 --- a/integration_tests/dbt_expectations/dbt_project.yml +++ b/integration_tests/dbt_expectations/dbt_project.yml @@ -24,12 +24,13 @@ vars: models: dbt_expectations_integration_tests: - data_test_factored: - +materialized: table - emails: &disabled #TODO - +enabled: false - timeseries_data: *disabled - # Need to implement for synapse - data_test: ¬-synapse - +enabled: "{{ target.name != 'synapse' }}" - data_test_factored: *not-synapse \ No newline at end of file + schema_tests: + data_test_factored: + +materialized: table + emails: &disabled #TODO + +enabled: false + timeseries_data: *disabled + # Need to implement for synapse + data_test: ¬-synapse + +enabled: "{{ target.name != 'synapse' }}" + data_test_factored: *not-synapse \ No newline at end of file From 2bf161d0d70be74ecbabba5bd60d1dea87b0b11e Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 2 Jun 2021 19:35:13 -0400 Subject: [PATCH 10/16] issue w/ equal_expression macro --- integration_tests/dbt_expectations/dbt_project.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/integration_tests/dbt_expectations/dbt_project.yml b/integration_tests/dbt_expectations/dbt_project.yml index 2904503..27393bd 100644 --- a/integration_tests/dbt_expectations/dbt_project.yml +++ b/integration_tests/dbt_expectations/dbt_project.yml @@ -30,7 +30,11 @@ models: emails: &disabled #TODO +enabled: false timeseries_data: *disabled - # Need to implement for synapse - data_test: ¬-synapse - +enabled: "{{ target.name != 'synapse' }}" - data_test_factored: *not-synapse \ No newline at end of file + # anders's weird NoneType error + # (see https://github.com/calogica/dbt-expectations/pull/63) + data_test: *disabled + # # Need to implement for synapse + # data_test: ¬-synapse + # +enabled: "{{ target.name != 'synapse' }}" + data_test_factored: ¬-synapse + +enabled: "{{ target.name != 'synapse' }}" \ No newline at end of file From c2f3b30af0cc62f8ea0ff07b30c14288c351e5cf Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 3 Jun 2021 08:35:56 -0700 Subject: [PATCH 11/16] thanks b-per? --- dbt-audit-helper | 2 +- dbt-date | 2 +- dbt-expectations | 2 +- dbt-utils | 2 +- .../_generalized/equal_expression.sql | 38 +++++++++++-------- 5 files changed, 27 insertions(+), 19 deletions(-) diff --git a/dbt-audit-helper b/dbt-audit-helper index a46aeb1..144c38f 160000 --- a/dbt-audit-helper +++ b/dbt-audit-helper @@ -1 +1 @@ -Subproject commit a46aeb19fb56f5917cd6a2aea5ad7fe1645eb829 +Subproject commit 144c38fede73be5817708ed8f5398a7e663aec25 diff --git a/dbt-date b/dbt-date index f877c86..e58387d 160000 --- a/dbt-date +++ b/dbt-date @@ -1 +1 @@ -Subproject commit f877c860c253a97a11408fc78be510732933349d +Subproject commit e58387dd6e85b116e693e2675193d71547a09a35 diff --git a/dbt-expectations b/dbt-expectations index cd3ff97..2a58ec6 160000 --- a/dbt-expectations +++ b/dbt-expectations @@ -1 +1 @@ -Subproject commit cd3ff974a21d5d98d26ab43ce1599fd529dbba22 +Subproject commit 2a58ec6969307330ca97d8cec18b80d488c24624 diff --git a/dbt-utils b/dbt-utils index 4fbaab8..bbba960 160000 --- a/dbt-utils +++ b/dbt-utils @@ -1 +1 @@ -Subproject commit 4fbaab83c8dc79e6c8d225c4880fb796db97001e +Subproject commit bbba960726667abc66b42624f0d36bbb62c37593 diff --git a/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql b/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql index 2a55af8..9cc7aaa 100644 --- a/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql +++ b/macros/dbt_expectations/schema_tests/_generalized/equal_expression.sql @@ -1,8 +1,10 @@ {%- macro sqlserver__get_select(model, expression, row_condition, group_by) %} select - {% for g in group_by -%} - {{ g }} as col_{{ loop.index }}, - {% endfor -%} + {# {%- if group_by %} #} + {% for g in group_by or [] -%} + {{ g }} as col_{{ loop.index }}, + {% endfor -%} + {# {% endif %} #} {{ expression }} as expression from {{ model }} @@ -10,11 +12,13 @@ where {{ row_condition }} {% endif %} - {%- if group_by|length > 1 or group_by[0] != "'col'" %} - group by - {% for g in group_by -%} - {{ g }}{% if not loop.last %},{% endif %} - {% endfor %} + {%- if group_by %} + {%- if group_by|length > 1 or group_by[0] != "'col'" %} + group by + {% for g in group_by -%} + {{ g }}{% if not loop.last %},{% endif %} + {% endfor %} + {% endif %} {% endif %} {% endmacro -%} @@ -34,7 +38,7 @@ {%- set compare_row_condition = row_condition if not compare_row_condition else compare_row_condition -%} {%- set compare_group_by = group_by if not compare_group_by else compare_group_by -%} - {%- set n_cols = group_by|length %} + {%- set n_cols = group_by|length if group_by else 0 %} with a as ( {{ dbt_expectations.get_select(model, expression, row_condition, group_by) }} ), @@ -53,12 +57,16 @@ abs(coalesce(a.expression, 0) - coalesce(b.expression, 0))/ nullif(a.expression, 0) as expression_difference_percent from - a - full outer join - b on - {% for i in range(1, n_cols + 1) -%} - a.col_{{ i }} = b.col_{{ i }} {% if not loop.last %}and{% endif %} - {% endfor -%} + {% if n_cols > 0 %} + a + full outer join + b on + {% for i in range(1, n_cols + 1) -%} + a.col_{{ i }} = b.col_{{ i }} {% if not loop.last %}and{% endif %} + {% endfor -%} + {% else %} + a cross join b + {% endif %} ) -- DEBUG: -- select * from final From cf559444b9422818ee1fc154ce9a36bee0e7b7f0 Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 3 Jun 2021 15:11:09 -0700 Subject: [PATCH 12/16] updating git submodules --- .gitmodules | 4 ++-- dbt-audit-helper | 2 +- dbt-expectations | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index a8a62df..9891d2a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -12,5 +12,5 @@ branch = main [submodule "dbt-audit-helper"] path = dbt-audit-helper - url = https://github.com/alieus/dbt-audit-helper - branch = adapter_dispatch + url = https://github.com/fishtown-analytics/dbt-audit-helper + branch = master diff --git a/dbt-audit-helper b/dbt-audit-helper index 144c38f..c6b917c 160000 --- a/dbt-audit-helper +++ b/dbt-audit-helper @@ -1 +1 @@ -Subproject commit 144c38fede73be5817708ed8f5398a7e663aec25 +Subproject commit c6b917c62aa18e43b8e73d72cd17a9a8e3721116 diff --git a/dbt-expectations b/dbt-expectations index 2a58ec6..cd3ff97 160000 --- a/dbt-expectations +++ b/dbt-expectations @@ -1 +1 @@ -Subproject commit 2a58ec6969307330ca97d8cec18b80d488c24624 +Subproject commit cd3ff974a21d5d98d26ab43ce1599fd529dbba22 From 9c60f78d8783cbf5607ff67d6d6ed238e1dcc524 Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 3 Jun 2021 15:56:02 -0700 Subject: [PATCH 13/16] enable data tests & disable syn.. timeseries ext --- integration_tests/dbt_expectations/dbt_project.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integration_tests/dbt_expectations/dbt_project.yml b/integration_tests/dbt_expectations/dbt_project.yml index 27393bd..68434e9 100644 --- a/integration_tests/dbt_expectations/dbt_project.yml +++ b/integration_tests/dbt_expectations/dbt_project.yml @@ -33,8 +33,8 @@ models: # anders's weird NoneType error # (see https://github.com/calogica/dbt-expectations/pull/63) data_test: *disabled - # # Need to implement for synapse - # data_test: ¬-synapse - # +enabled: "{{ target.name != 'synapse' }}" + # Need to implement for synapse + timeseries_data_extended: ¬-synapse + +enabled: "{{ target.name != 'synapse' }}" data_test_factored: ¬-synapse +enabled: "{{ target.name != 'synapse' }}" \ No newline at end of file From 4802c68da40b3786c88100c6ce4d0ea3bfe222da Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 3 Jun 2021 16:04:37 -0700 Subject: [PATCH 14/16] fix yaml anchor --- integration_tests/dbt_expectations/dbt_project.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration_tests/dbt_expectations/dbt_project.yml b/integration_tests/dbt_expectations/dbt_project.yml index 68434e9..f2cd4bf 100644 --- a/integration_tests/dbt_expectations/dbt_project.yml +++ b/integration_tests/dbt_expectations/dbt_project.yml @@ -36,5 +36,4 @@ models: # Need to implement for synapse timeseries_data_extended: ¬-synapse +enabled: "{{ target.name != 'synapse' }}" - data_test_factored: ¬-synapse - +enabled: "{{ target.name != 'synapse' }}" \ No newline at end of file + data_test_factored: *not-synapse From a539c970c8a0273d23b20d8785c559932eb71e70 Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 3 Jun 2021 16:35:28 -0700 Subject: [PATCH 15/16] enabe data test --- integration_tests/dbt_expectations/dbt_project.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/dbt_expectations/dbt_project.yml b/integration_tests/dbt_expectations/dbt_project.yml index f2cd4bf..91302f6 100644 --- a/integration_tests/dbt_expectations/dbt_project.yml +++ b/integration_tests/dbt_expectations/dbt_project.yml @@ -32,7 +32,7 @@ models: timeseries_data: *disabled # anders's weird NoneType error # (see https://github.com/calogica/dbt-expectations/pull/63) - data_test: *disabled + # data_test: *disabled # Need to implement for synapse timeseries_data_extended: ¬-synapse +enabled: "{{ target.name != 'synapse' }}" From bc7fe8ffa0d8ff42182364de783a01ef5be6db1a Mon Sep 17 00:00:00 2001 From: Alieu Date: Thu, 3 Jun 2021 16:47:41 -0700 Subject: [PATCH 16/16] disable failing timeseries tests --- integration_tests/dbt_expectations/dbt_project.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/integration_tests/dbt_expectations/dbt_project.yml b/integration_tests/dbt_expectations/dbt_project.yml index 91302f6..f025f41 100644 --- a/integration_tests/dbt_expectations/dbt_project.yml +++ b/integration_tests/dbt_expectations/dbt_project.yml @@ -32,8 +32,7 @@ models: timeseries_data: *disabled # anders's weird NoneType error # (see https://github.com/calogica/dbt-expectations/pull/63) - # data_test: *disabled + data_test: *disabled # Need to implement for synapse - timeseries_data_extended: ¬-synapse - +enabled: "{{ target.name != 'synapse' }}" - data_test_factored: *not-synapse + data_test_factored: *disabled + timeseries_data_extended: *disabled