Skip to content

Commit 2fb2c1f

Browse files
committed
Refactor microbatch tests to improve structure and add query constants
- Updated test structure to use class-based fixtures for better organization. - Introduced constants for SQL queries to enhance readability and maintainability. - Adjusted test cases for microbatch processing with varying batch sizes (1h, 8h, 24h). - Ensured consistent ClickHouse client setup across tests.
1 parent a6d88d8 commit 2fb2c1f

File tree

4 files changed

+140
-36
lines changed

4 files changed

+140
-36
lines changed

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ services:
2323

2424
networks:
2525
app:
26-
driver: bridge
26+
driver: bridge

tests/pytest/constants.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
11
# dbt settings
22
MICROBATCH_INPUT_MODEL = 'microbatch_test_input'
33
MICROBATCH_TEST_MODEL = 'microbatch_test'
4+
5+
# queries
6+
QUERY_COUNT_ROWS = """
7+
select
8+
count() as rows_count
9+
from
10+
default.{table_name}
11+
"""
12+
13+
QUERY_TIMESTAMP = """
14+
select
15+
min({timestamp_column}) as min_timestamp,
16+
max({timestamp_column}) as max_timestamp
17+
from
18+
default.{table_name}
19+
"""
Lines changed: 122 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime, timedelta
1+
from datetime import timedelta
22
from os import environ
33

44
import clickhouse_connect
@@ -8,47 +8,135 @@
88
from tests.pytest.constants import (
99
MICROBATCH_INPUT_MODEL,
1010
MICROBATCH_TEST_MODEL,
11+
QUERY_COUNT_ROWS,
12+
QUERY_TIMESTAMP,
1113
)
1214

1315

14-
@pytest.fixture(scope="session")
15-
def ch_client():
16-
client = clickhouse_connect.get_client(
17-
host=environ['CLICKHOUSE_HOST'],
18-
port=environ['CLICKHOUSE_PORT'],
19-
user=environ['CLICKHOUSE_USER'],
20-
password=environ['CLICKHOUSE_PASSWORD'],
21-
database=environ['CLICKHOUSE_DATABASE'],
22-
)
23-
return client
16+
class TestMicrobatch:
17+
@pytest.fixture(scope="class")
18+
def ch_client(self):
19+
"""ClickHouse client setup fixture"""
2420

21+
client = clickhouse_connect.get_client(
22+
host=environ['CLICKHOUSE_HOST'],
23+
port=environ['CLICKHOUSE_PORT'],
24+
user=environ['CLICKHOUSE_USER'],
25+
password=environ['CLICKHOUSE_PASSWORD'],
26+
database=environ['CLICKHOUSE_DATABASE'],
27+
)
28+
return client
2529

26-
@pytest.fixture(scope="session")
27-
def setup_test_environment(ch_client):
28-
con = ch_client
29-
return con
30+
@pytest.fixture(scope="class")
31+
def setup_test_environment(self, ch_client):
32+
"""Pretest setup fixture"""
3033

34+
run_dbt(['run', '--select', f'+{MICROBATCH_INPUT_MODEL}'])
3135

32-
def test_batching_24h(setup_test_environment):
33-
con = setup_test_environment
36+
con = ch_client
37+
timestamps = con.query_df(
38+
QUERY_TIMESTAMP.format(
39+
table_name=MICROBATCH_TEST_MODEL, timestamp_column='event_datetime'
40+
)
41+
)
42+
min_timestamp = timestamps['min_timestamp'][0]
43+
max_timestamp = timestamps['max_timestamp'][0]
3444

35-
result_query = """
36-
select
37-
count() as rows_count
38-
from
39-
default.{table_name}
40-
"""
45+
return {'min_timestamp': min_timestamp, 'max_timestamp': max_timestamp}
4146

42-
run_dbt(
43-
[
44-
'run',
45-
'--select',
46-
f'+{MICROBATCH_TEST_MODEL}',
47-
'--vars',
48-
f'{{"materialization_start_date": "{(datetime.now() - timedelta(hours=1000)).strftime("%Y-%m-%d")}" }}',
49-
]
50-
)
47+
def test_batching_1h(self, ch_client, setup_test_environment):
48+
"""
49+
Microbatch test with 1h batch size
50+
"""
5151

52-
expected_df = con.query_df(result_query.format(table_name=MICROBATCH_TEST_MODEL))
52+
con = ch_client
53+
max_timestamp = setup_test_environment['max_timestamp']
54+
offset_hours = 50
5355

54-
assert expected_df['rows_count'][0] == 1000
56+
run_dbt(
57+
[
58+
'run',
59+
'--select',
60+
f'{MICROBATCH_TEST_MODEL}',
61+
'--vars',
62+
f'''{{
63+
"materialization_start_date": "{(max_timestamp - timedelta(hours=offset_hours)).strftime("%Y-%m-%d")}",
64+
"batch_size": 1
65+
}}''',
66+
'--full-refresh',
67+
]
68+
)
69+
70+
actual_result = con.query_df(
71+
QUERY_COUNT_ROWS.format(table_name=MICROBATCH_TEST_MODEL)
72+
)
73+
74+
expected_result = con.query_df(
75+
QUERY_COUNT_ROWS.format(table_name=MICROBATCH_INPUT_MODEL)
76+
+ f"where event_datetime >= toDate('{max_timestamp}' - interval {offset_hours} hour)"
77+
)
78+
79+
assert expected_result['rows_count'][0] == actual_result['rows_count'][0]
80+
81+
def test_batching_8h(self, ch_client, setup_test_environment):
82+
"""
83+
Microbatch test with 8h batch size
84+
"""
85+
86+
con = ch_client
87+
max_timestamp = setup_test_environment['max_timestamp']
88+
offset_hours = 100
89+
90+
run_dbt(
91+
[
92+
'run',
93+
'--select',
94+
f'{MICROBATCH_TEST_MODEL}',
95+
'--vars',
96+
f'''{{
97+
"materialization_start_date": "{(max_timestamp - timedelta(hours=offset_hours)).strftime("%Y-%m-%d")}",
98+
"batch_size": 8
99+
}}''',
100+
'--full-refresh',
101+
]
102+
)
103+
104+
actual_result = con.query_df(
105+
QUERY_COUNT_ROWS.format(table_name=MICROBATCH_TEST_MODEL)
106+
)
107+
108+
expected_result = con.query_df(
109+
QUERY_COUNT_ROWS.format(table_name=MICROBATCH_INPUT_MODEL)
110+
+ f"where event_datetime >= toDate('{max_timestamp}' - interval {offset_hours} hour)"
111+
)
112+
113+
assert expected_result['rows_count'][0] == actual_result['rows_count'][0]
114+
115+
def test_batching_24h(self, ch_client, setup_test_environment):
116+
"""
117+
Microbatch test with 24h batch size
118+
"""
119+
120+
con = ch_client
121+
min_timestamp = setup_test_environment['min_timestamp']
122+
123+
run_dbt(
124+
[
125+
'run',
126+
'--select',
127+
f'{MICROBATCH_TEST_MODEL}',
128+
'--vars',
129+
f'{{"materialization_start_date": "{min_timestamp.strftime("%Y-%m-%d")}"}}',
130+
'--full-refresh',
131+
]
132+
)
133+
134+
actual_result = con.query_df(
135+
QUERY_COUNT_ROWS.format(table_name=MICROBATCH_TEST_MODEL)
136+
)
137+
138+
expected_result = con.query_df(
139+
QUERY_COUNT_ROWS.format(table_name=MICROBATCH_INPUT_MODEL)
140+
)
141+
142+
assert expected_result['rows_count'][0] == actual_result['rows_count'][0]

tests/pytest/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
addopts = "--capture=no"
33
filterwarnings = [
44
"ignore::Warning"
5-
]
5+
]

0 commit comments

Comments
 (0)