5
5
import dlt
6
6
from dlt .common import json
7
7
from dlt .common .data_types .typing import TDataType
8
+ from dlt .common .schema .utils import is_nested_table , may_be_nested
8
9
from dlt .common .storages import (
9
10
SchemaStorage ,
10
11
SchemaStorageConfiguration ,
23
24
from tests .extract .utils import expect_extracted_file
24
25
25
26
27
+ NESTED_DATA = [
28
+ {
29
+ "id" : 1 ,
30
+ "outer1" : [
31
+ {"outer1_id" : "2" , "innerfoo" : [{"innerfoo_id" : "3" }]},
32
+ ],
33
+ "outer2" : [
34
+ {"outer2_id" : "4" , "innerbar" : [{"innerbar_id" : "5" }]},
35
+ ],
36
+ }
37
+ ]
38
+
39
+
26
40
@pytest .fixture
27
41
def extract_step () -> Extract :
28
42
clean_test_storage (init_normalize = True )
@@ -279,19 +293,8 @@ def with_table_hints():
279
293
280
294
281
295
def test_extract_nested_hints (extract_step : Extract ) -> None :
282
- data = [
283
- {
284
- "id" : 1 ,
285
- "outer1" : [
286
- {"outer1_id" : "2" , "innerfoo" : [{"innerfoo_id" : "3" }]},
287
- ],
288
- "outer2" : [
289
- {"outer2_id" : "4" , "innerbar" : [{"innerbar_id" : "5" }]},
290
- ],
291
- }
292
- ]
293
296
resource_name = "with_nested_hints"
294
- nested_resource = DltResource .from_data (data , name = resource_name )
297
+ nested_resource = DltResource .from_data (NESTED_DATA , name = resource_name )
295
298
296
299
# Check 1: apply nested hints
297
300
outer1_id_new_type : TDataType = "double"
@@ -318,21 +321,21 @@ def test_extract_nested_hints(extract_step: Extract) -> None:
318
321
319
322
# root table exists even though there are no explicit hints
320
323
assert pre_extract_schema .get_table (resource_name )
321
- assert (
322
- pre_extract_schema . get_table ( "with_nested_hints__outer1" ) ["parent" ] == "with_nested_hints"
323
- )
324
- assert (
325
- pre_extract_schema . get_table ( "with_nested_hints__outer1" )[ "columns" ]
326
- == nested_hints [ "outer1" ][ "columns" ]
327
- )
328
- assert (
329
- pre_extract_schema .get_table ("with_nested_hints__outer2__innerbar" )[ "parent" ]
330
- == "with_nested_hints__outer2"
331
- )
332
- assert (
333
- pre_extract_schema . get_table ( "with_nested_hints__outer2__innerbar" )[ "columns" ]
334
- == nested_hints [( "outer2" , "innerbar" )][ "columns" ]
335
- )
324
+ outer1_tab = pre_extract_schema . get_table ( "with_nested_hints__outer1" )
325
+ assert outer1_tab ["parent" ] == "with_nested_hints"
326
+ assert outer1_tab [ "columns" ] == nested_hints [ "outer1" ][ "columns" ]
327
+ # no resource on nested table
328
+ assert "resource" not in outer1_tab
329
+ assert is_nested_table ( outer1_tab ) is True
330
+ assert may_be_nested ( outer1_tab ) is True
331
+
332
+ outer2_innerbar_tab = pre_extract_schema .get_table ("with_nested_hints__outer2__innerbar" )
333
+ assert outer2_innerbar_tab [ "parent" ] == "with_nested_hints__outer2"
334
+ assert outer2_innerbar_tab [ "columns" ] == nested_hints [( "outer2" , "innerbar" )][ "columns" ]
335
+ assert "resource" not in outer2_innerbar_tab
336
+ assert is_nested_table ( outer2_innerbar_tab ) is True
337
+ assert may_be_nested ( outer2_innerbar_tab ) is True
338
+
336
339
# this table is generated to ensure `innerbar` has a parent that links it to the root table
337
340
# NOTE: nested tables do not have parent set
338
341
assert pre_extract_schema .get_table (implicit_parent ) == {
@@ -341,19 +344,88 @@ def test_extract_nested_hints(extract_step: Extract) -> None:
341
344
"columns" : {},
342
345
}
343
346
344
- source = DltSource (dlt .Schema ("hintable" ), "module" , [nested_resource ])
345
347
extract_step .extract (source , 20 , 1 )
346
348
# schema after extractions must be same as discovered schema
347
349
assert source .schema ._schema_tables == pre_extract_schema ._schema_tables
348
350
349
351
350
- def test_break_nesting_with_primary_key () -> None :
352
+ def test_break_nesting_with_primary_key (extract_step : Extract ) -> None :
353
+ resource_name = "with_nested_hints"
354
+ nested_resource = DltResource .from_data (NESTED_DATA , name = resource_name )
355
+ nested_hints : Dict [TTableNames , TResourceNestedHints ] = {
356
+ "outer1" : {"columns" : {"outer1_id" : {"name" : "outer1_id" , "data_type" : "bigint" }}},
357
+ ("outer1" , "innerbar" ): {"primary_key" : "innerfoo_id" },
358
+ }
359
+ nested_resource .apply_hints (nested_hints = nested_hints )
360
+ assert nested_resource .nested_hints == nested_hints
361
+
362
+ source = DltSource (dlt .Schema ("hintable" ), "module" , [nested_resource ])
363
+ pre_extract_schema = source .discover_schema ()
351
364
# primary key will break nesting
365
+ # print(pre_extract_schema.to_pretty_yaml())
366
+ innerfoo_tab = pre_extract_schema .tables ["with_nested_hints__outer1__innerbar" ]
367
+ assert innerfoo_tab ["columns" ]["innerfoo_id" ]["primary_key" ] is True
352
368
# resource must be present
369
+ assert innerfoo_tab ["resource" ] == "with_nested_hints"
353
370
# parent cannot be present
371
+ assert "parent" not in innerfoo_tab ["columns" ]["innerfoo_id" ]
354
372
# is_nested_table must be false
373
+ assert is_nested_table (innerfoo_tab ) is False
374
+ assert may_be_nested (innerfoo_tab ) is False
375
+ extract_step .extract (source , 20 , 1 )
376
+ # schema after extractions must be same as discovered schema
377
+ assert source .schema ._schema_tables == pre_extract_schema ._schema_tables
378
+
379
+
380
+ def test_nested_hints_dynamic_table_names (extract_step : Extract ) -> None :
381
+ data = [
382
+ {"Event" : "issue" , "DataBlob" : [{"ID" : 1 , "Name" : "first" , "Date" : "2024-01-01" }]},
383
+ {"Event" : "purchase" , "DataBlob" : [{"PID" : "20-1" , "Name" : "first" , "Date" : "2024-01-01" }]},
384
+ ]
385
+ events = DltResource .from_data (
386
+ data ,
387
+ name = "events" ,
388
+ hints = dlt .mark .make_hints (
389
+ table_name = lambda e : e ["Event" ],
390
+ nested_hints = {
391
+ "DataBlob" : dlt .mark .make_nested_hints (
392
+ columns = [{"name" : "Date" , "data_type" : "date" }]
393
+ )
394
+ },
395
+ ),
396
+ )
397
+
398
+ source = DltSource (dlt .Schema ("hintable" ), "module" , [events ])
399
+ extract_step .extract (source , 20 , 1 )
400
+ # make sure that tables exist and types are applies
401
+ assert "issue" in source .schema .tables
402
+ assert "purchase" in source .schema .tables
403
+ assert source .schema .tables ["issue__data_blob" ]["columns" ]["date" ]["data_type" ] == "date"
404
+ assert source .schema .tables ["purchase__data_blob" ]["columns" ]["date" ]["data_type" ] == "date"
405
+
406
+
407
+ def test_nested_hints_table_name (extract_step : Extract ) -> None :
408
+ data = [
409
+ {"Event" : "issue" , "DataBlob" : [{"ID" : 1 , "Name" : "first" , "Date" : "2024-01-01" }]},
410
+ {"Event" : "purchase" , "DataBlob" : [{"PID" : "20-1" , "Name" : "first" , "Date" : "2024-01-01" }]},
411
+ ]
412
+ events = DltResource .from_data (
413
+ data ,
414
+ name = "events" ,
415
+ hints = dlt .mark .make_hints (
416
+ table_name = "events_table" ,
417
+ nested_hints = {
418
+ "DataBlob" : dlt .mark .make_nested_hints (
419
+ columns = [{"name" : "Date" , "data_type" : "date" }]
420
+ )
421
+ },
422
+ ),
423
+ )
355
424
356
- pass
425
+ source = DltSource (dlt .Schema ("hintable" ), "module" , [events ])
426
+ extract_step .extract (source , 20 , 1 )
427
+ assert "events_table" in source .schema .tables
428
+ assert source .schema .tables ["events_table__data_blob" ]["columns" ]["date" ]["data_type" ] == "date"
357
429
358
430
359
431
def test_extract_metrics_on_exception_no_flush (extract_step : Extract ) -> None :
0 commit comments