|
7 | 7 | from dlt.common.destination.dataset import SupportsReadableDataset
|
8 | 8 |
|
9 | 9 | from tests.pipeline.utils import load_table_counts
|
10 |
| - |
11 | 10 | from dlt.extract.hints import make_hints
|
12 | 11 |
|
13 | 12 |
|
14 |
| -def test_sql_job() -> None: |
| 13 | +def test_simple_model_jobs() -> None: |
15 | 14 | # populate a table with 10 items and retrieve dataset
|
16 | 15 | pipeline = dlt.pipeline(
|
17 | 16 | pipeline_name="example_pipeline", destination="duckdb", dataset_name="example_dataset"
|
18 | 17 | )
|
19 | 18 | pipeline.run([{"a": i} for i in range(10)], table_name="example_table")
|
20 | 19 | dataset = pipeline.dataset()
|
21 | 20 |
|
| 21 | + example_table_columns = dataset.schema.tables["example_table"]["columns"] |
| 22 | + |
22 | 23 | # create a resource that generates sql statements to create 2 new tables
|
| 24 | + # we also need to supply all hints so the table can be created |
23 | 25 | @dlt.resource()
|
24 | 26 | def copied_table() -> Any:
|
25 | 27 | query = dataset["example_table"].limit(5).query()
|
26 | 28 | yield dlt.mark.with_hints(
|
27 |
| - f"CREATE OR REPLACE TABLE copied_table AS {query}", |
28 |
| - make_hints(file_format="sql"), |
| 29 | + query, hints=make_hints(columns=example_table_columns), data_item_format="model" |
29 | 30 | )
|
30 | 31 |
|
| 32 | + @dlt.resource() |
| 33 | + def copied_table_2() -> Any: |
31 | 34 | query = dataset["example_table"].limit(7).query()
|
32 | 35 | yield dlt.mark.with_hints(
|
33 |
| - f"CREATE OR REPLACE TABLE copied_table2 AS {query}", |
34 |
| - make_hints(file_format="sql"), |
| 36 | + query, hints=make_hints(columns=example_table_columns), data_item_format="model" |
35 | 37 | )
|
36 | 38 |
|
37 | 39 | # run sql jobs
|
38 |
| - pipeline.run(copied_table()) |
| 40 | + pipeline.run([copied_table(), copied_table_2()]) |
39 | 41 |
|
40 | 42 | # the two tables where created
|
41 |
| - assert load_table_counts(pipeline, "example_table", "copied_table", "copied_table2") == { |
42 |
| - "example_table": 10, |
| 43 | + assert load_table_counts(pipeline, "copied_table", "copied_table_2", "example_table") == { |
43 | 44 | "copied_table": 5,
|
44 |
| - "copied_table2": 7, |
| 45 | + "copied_table_2": 7, |
| 46 | + "example_table": 10, |
45 | 47 | }
|
46 | 48 |
|
47 | 49 | # we have a table entry for the main table "copied_table"
|
48 | 50 | assert "copied_table" in pipeline.default_schema.tables
|
49 |
| - # but no columns, it's up to the user to provide a schema |
50 |
| - assert len(pipeline.default_schema.tables["copied_table"]["columns"]) == 0 |
| 51 | + # and we only have the three columns from the original table |
| 52 | + assert set(pipeline.default_schema.tables["copied_table"]["columns"].keys()) == { |
| 53 | + "a", |
| 54 | + "_dlt_id", |
| 55 | + "_dlt_load_id", |
| 56 | + } |
0 commit comments