Skip to content

Commit

Permalink
Merge pull request #47 from dbt-msft/shim_dbt-expectations_macros
Browse files Browse the repository at this point in the history
Shim dbt-expectations macros
  • Loading branch information
alieus authored Jun 4, 2021
2 parents c162fba + bc7fe8f commit 28822f1
Show file tree
Hide file tree
Showing 15 changed files with 620 additions and 20 deletions.
6 changes: 3 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ workflows:
context: DBT_SYNAPSE_PROFILE
- integration-dbt-utils-azuresql: *dbt-context
- integration-dbt-audit-helper-azuresql: *dbt-context
# - integration-dbt-expectations-azuresql: *dbt-context
- integration-dbt-expectations-azuresql: *dbt-context
- integration-dbt-date-azuresql: *dbt-context
- integration-dbt-utils-synapse: &syn-step
<<: *dbt-context
Expand All @@ -156,12 +156,12 @@ workflows:
<<: *dbt-context
requires:
- start-synapse
# - integration-dbt-expectations-synapse: *syn-step
- integration-dbt-expectations-synapse: *syn-step
- integration-dbt-date-synapse: *syn-step
- pause-synapse:
<<: *dbt-context
requires:
- integration-dbt-utils-synapse
- integration-dbt-audit-helper-synapse
# - integration-dbt-expectations-synapse
- integration-dbt-expectations-synapse
- integration-dbt-date-synapse
5 changes: 2 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,5 @@
branch = main
[submodule "dbt-audit-helper"]
path = dbt-audit-helper
url = https://github.com/alieus/dbt-audit-helper
branch = adapter_dispatch

url = https://github.com/fishtown-analytics/dbt-audit-helper
branch = master
2 changes: 1 addition & 1 deletion dbt-audit-helper
2 changes: 1 addition & 1 deletion dbt-expectations
Submodule dbt-expectations updated 44 files
+15 −0 CHANGELOG.md
+184 −99 README.md
+1 −1 integration_tests/dbt_project.yml
+23 −1 integration_tests/models/schema_tests/schema.yml
+1 −1 integration_tests/models/schema_tests/timeseries_data.sql
+1 −1 integration_tests/models/schema_tests/timeseries_data_extended.sql
+1 −1 integration_tests/models/schema_tests/timeseries_hourly_data_extended.sql
+10 −2 macros/schema_tests/_generalized/equal_expression.sql
+17 −7 macros/schema_tests/_generalized/expression_between.sql
+12 −6 macros/schema_tests/_generalized/expression_is_true.sql
+2 −0 macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_be_greater_than.sql
+2 −0 macros/schema_tests/aggregate_functions/expect_column_distinct_count_to_equal.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_mean_to_be_between.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_median_to_be_between.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_min_to_be_between.sql
+7 −2 macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_proportion_of_unique_values_to_be_between.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_quantile_values_to_be_between.sql
+6 −2 macros/schema_tests/aggregate_functions/expect_column_stdev_to_be_between.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_sum_to_be_between.sql
+4 −2 macros/schema_tests/aggregate_functions/expect_column_unique_value_count_to_be_between.sql
+3 −2 macros/schema_tests/column_values_basic/expect_column_values_to_be_between.sql
+1 −0 macros/schema_tests/column_values_basic/expect_column_values_to_be_null.sql
+1 −0 macros/schema_tests/column_values_basic/expect_column_values_to_not_be_null.sql
+1 −0 macros/schema_tests/multi-column/expect_column_pair_values_A_to_be_greater_than_B.sql
+1 −0 macros/schema_tests/multi-column/expect_column_pair_values_to_be_equal.sql
+1 −0 macros/schema_tests/multi-column/expect_column_pair_values_to_be_in_set.sql
+2 −0 macros/schema_tests/multi-column/expect_multicolumn_sum_to_equal.sql
+8 −8 macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql
+1 −0 macros/schema_tests/string_matching/expect_column_value_lengths_to_equal.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_match_like_pattern.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_match_like_pattern_list.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_match_regex.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_match_regex_list.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_not_match_like_pattern.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_not_match_like_pattern_list.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_not_match_regex.sql
+1 −0 macros/schema_tests/string_matching/expect_column_values_to_not_match_regex_list.sql
+6 −0 macros/schema_tests/table_shape/expect_grouped_row_values_to_have_recent_data.sql
+19 −2 macros/schema_tests/table_shape/expect_table_column_count_to_be_between.sql
+5 −3 macros/schema_tests/table_shape/expect_table_row_count_to_be_between.sql
+9 −1 macros/schema_tests/table_shape/expect_table_row_count_to_equal.sql
+0 −2 packages.yml
2 changes: 1 addition & 1 deletion dbt-utils
14 changes: 12 additions & 2 deletions integration_tests/dbt_expectations/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,15 @@ vars:

models:
dbt_expectations_integration_tests:
data_test_factored:
+materialized: table
schema_tests:
data_test_factored:
+materialized: table
emails: &disabled #TODO
+enabled: false
timeseries_data: *disabled
# anders's weird NoneType error
# (see https://github.com/calogica/dbt-expectations/pull/63)
data_test: *disabled
# Need to implement for synapse
data_test_factored: *disabled
timeseries_data_extended: *disabled
Original file line number Diff line number Diff line change
@@ -1,17 +1,108 @@
{%- macro sqlserver__get_select(model, expression, row_condition, group_by) %}
select
{% for g in group_by -%}
{{ g }} as col_{{ loop.index }},
{% endfor -%}
{# {%- if group_by %} #}
{% for g in group_by or [] -%}
{{ g }} as col_{{ loop.index }},
{% endfor -%}
{# {% endif %} #}
{{ expression }} as expression
from
{{ model }}
{%- if row_condition %}
where
{{ row_condition }}
{% endif %}
group by
{% for g in group_by -%}
{{ g }}{% if not loop.last %}, {% endif %}
{% endfor %}
{% endmacro -%}
{%- if group_by %}
{%- if group_by|length > 1 or group_by[0] != "'col'" %}
group by
{% for g in group_by -%}
{{ g }}{% if not loop.last %},{% endif %}
{% endfor %}
{% endif %}
{% endif %}
{% endmacro -%}

{%- macro sqlserver__test_equal_expression(model, expression,
compare_model,
compare_expression,
group_by,
compare_group_by,
row_condition,
compare_row_condition,
tolerance,
tolerance_percent,
return_difference) -%}

{%- set compare_model = model if not compare_model else compare_model -%}
{%- set compare_expression = expression if not compare_expression else compare_expression -%}
{%- set compare_row_condition = row_condition if not compare_row_condition else compare_row_condition -%}
{%- set compare_group_by = group_by if not compare_group_by else compare_group_by -%}

{%- set n_cols = group_by|length if group_by else 0 %}
with a as (
{{ dbt_expectations.get_select(model, expression, row_condition, group_by) }}
),
b as (
{{ dbt_expectations.get_select(compare_model, compare_expression, compare_row_condition, compare_group_by) }}
),
final as (

select
{% for i in range(1, n_cols + 1) -%}
coalesce(a.col_{{ i }}, b.col_{{ i }}) as col_{{ i }},
{% endfor %}
a.expression,
b.expression as compare_expression,
abs(coalesce(a.expression, 0) - coalesce(b.expression, 0)) as expression_difference,
abs(coalesce(a.expression, 0) - coalesce(b.expression, 0))/
nullif(a.expression, 0) as expression_difference_percent
from
{% if n_cols > 0 %}
a
full outer join
b on
{% for i in range(1, n_cols + 1) -%}
a.col_{{ i }} = b.col_{{ i }} {% if not loop.last %}and{% endif %}
{% endfor -%}
{% else %}
a cross join b
{% endif %}
)
-- DEBUG:
-- select * from final
select
{% if return_difference %}
coalesce(sum(expression_difference), 0)
{% else %}
count(*)
{% endif %}
from final
where
{% if tolerance_percent %}
expression_difference_percent > {{ tolerance_percent }}
{% else %}
expression_difference > {{ tolerance }}
{% endif %}
{%- endmacro -%}

{%- macro synapse__test_equal_expression(model, expression,
compare_model,
compare_expression,
group_by,
compare_group_by,
row_condition,
compare_row_condition,
tolerance,
tolerance_percent,
return_difference) -%}
{% do return( tsql_utils.sqlserver__test_equal_expression(model, expression,
compare_model,
compare_expression,
group_by,
compare_group_by,
row_condition,
compare_row_condition,
tolerance,
tolerance_percent,
return_difference)) -%}
{%- endmacro -%}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{% macro sqlserver__expression_is_true(model, expression, test_condition, group_by_columns, row_condition) %}

{% if test_condition == "= true" %}
{% set test_condition = "= 1" %}
{% endif %}


with grouped_expression as (

select
{% if group_by_columns %}
{% for group_by_column in group_by_columns -%}
{{ group_by_column }} as col_{{ loop.index }},
{% endfor -%}
{% endif %}
case when {{ expression }} then 1 else 0 end as expression
from {{ model }}
{%- if row_condition %}
where
{{ row_condition }}
{% endif %}
{% if group_by_columns %}
group by
{% for group_by_column in group_by_columns -%}
{{ group_by_column }}{% if not loop.last %},{% endif %}
{% endfor %}
{% endif %}

),
validation_errors as (

select
*
from
grouped_expression
where
not(expression {{ test_condition }})

)

select count(*)
from validation_errors

{% endmacro %}

{% macro synapse__expression_is_true(model, expression, test_condition, group_by_columns, row_condition) %}
{% do return( tsql_utils.sqlserver__expression_is_true(model, expression, test_condition, group_by_columns, row_condition)) %}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{% macro sqlserver__test_expect_column_most_common_value_to_be_in_set(model, column_name,
value_set,
top_n,
quote_values=False,
data_type="decimal",
row_condition=None
) -%}

with value_counts as (

select
{% if quote_values -%}
{{ column_name }}
{%- else -%}
cast({{ column_name }} as {{ data_type }})
{%- endif %} as value_field,
count(*) as value_count

from {{ model }}
{% if row_condition %}
where {{ row_condition }}
{% endif %}

group by {% if quote_values -%}
{{ column_name }}
{%- else -%}
cast({{ column_name }} as {{ data_type }})
{%- endif %}

),
value_counts_ranked as (

select
*,
row_number() over(order by value_count desc) as value_count_rank
from
value_counts

),
value_count_top_n as (

select
value_field
from
value_counts_ranked
where
value_count_rank = {{ top_n }}

),
set_values as (

{% for value in value_set -%}
select
{% if quote_values -%}
'{{ value }}'
{%- else -%}
cast({{ value }} as {{ data_type }})
{%- endif %} as value_field
{% if not loop.last %}union all{% endif %}
{% endfor %}

),
unique_set_values as (

select distinct value_field
from
set_values

),
validation_errors as (
-- values from the model that are not in the set
select
value_field
from
value_count_top_n
where
value_field not in (select value_field from unique_set_values)

)

select count(*) as validation_errors
from validation_errors

{% endmacro %}

{% macro synapse__test_expect_column_most_common_value_to_be_in_set(model, column_name,
value_set,
top_n,
quote_values,
data_type,
row_condition
) -%}
{% do return( tsql_utils.sqlserver__test_expect_column_most_common_value_to_be_in_set(model, column_name,
value_set,
top_n,
quote_values=False,
data_type="decimal",
row_condition=None
)) -%}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{% macro sqlserver__test_expect_column_stdev_to_be_between(model, column_name,
min_value,
max_value,
row_condition=None
) -%}
{% set expression %}
stdev({{ column_name }})
{% endset %}
{{ dbt_expectations.expression_between(model,
expression=expression,
min_value=min_value,
max_value=max_value,
row_condition=row_condition
) }}
{% endmacro %}

{% macro synapse__test_expect_column_stdev_to_be_between(model, column_name,
min_value,
max_value,
row_condition
) -%}
{% do return( tsql_utils.sqlserver__test_expect_column_stdev_to_be_between(model, column_name,
min_value,
max_value,
row_condition=None
)) -%}
{% endmacro %}
Loading

0 comments on commit 28822f1

Please sign in to comment.