add correct insert job

sh-rp · sh-rp · commit 5d59485e8428 · 2025-03-13T15:42:08.000+01:00
diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py
@@ -115,18 +115,29 @@ def is_sql_job(file_path: str) -> bool:
 
 class ModelLoadJob(RunnableLoadJob):
     """
-    A job to insert rows into a table from a model file which contains a list of select statements
+    A job to insert rows into a table from a model file which contains a single select statement
     """
 
     def __init__(self, file_path: str) -> None:
         super().__init__(file_path)
         self._job_client: "SqlJobClientBase" = None
+        self._sql_client = self._job_client.sql_client
 
     def run(self) -> None:
         with FileStorage.open_zipsafe_ro(self._file_path, "r", encoding="utf-8") as f:
-            sql = f.read()
-        self._sql_client = self._job_client.sql_client
-        self._sql_client.execute_sql(sql)
+            select_statement = f.read()
+
+        insert_statement = self._insert_statement_from_select_statement(select_statement)
+        self._sql_client.execute_sql(insert_statement)
+
+    def _insert_statement_from_select_statement(self, select_statement: str) -> str:
+        """
+        NOTE: Here we generate an insert statement from a select statement, this is the duckdb
+        dialect, we may be able to transpile with sqlglot for each destination or we need
+        to subclass and override this method.
+        """
+        name = self._sql_client.make_qualified_table_name(self._load_table["name"])
+        return f"INSERT INTO {name} {select_statement};"
 
     @staticmethod
     def is_model_job(file_path: str) -> bool:
diff --git a/tests/load/test_model_item_format.py b/tests/load/test_model_item_format.py
@@ -7,44 +7,50 @@
 from dlt.common.destination.dataset import SupportsReadableDataset
 
 from tests.pipeline.utils import load_table_counts
-
 from dlt.extract.hints import make_hints
 
 
-def test_sql_job() -> None:
+def test_simple_model_jobs() -> None:
     # populate a table with 10 items and retrieve dataset
     pipeline = dlt.pipeline(
         pipeline_name="example_pipeline", destination="duckdb", dataset_name="example_dataset"
     )
     pipeline.run([{"a": i} for i in range(10)], table_name="example_table")
     dataset = pipeline.dataset()
 
+    example_table_columns = dataset.schema.tables["example_table"]["columns"]
+
     # create a resource that generates sql statements to create 2 new tables
+    # we also need to supply all hints so the table can be created
     @dlt.resource()
     def copied_table() -> Any:
         query = dataset["example_table"].limit(5).query()
         yield dlt.mark.with_hints(
-            f"CREATE OR REPLACE TABLE copied_table AS {query}",
-            make_hints(file_format="sql"),
+            query, hints=make_hints(columns=example_table_columns), data_item_format="model"
         )
 
+    @dlt.resource()
+    def copied_table_2() -> Any:
         query = dataset["example_table"].limit(7).query()
         yield dlt.mark.with_hints(
-            f"CREATE OR REPLACE TABLE copied_table2 AS {query}",
-            make_hints(file_format="sql"),
+            query, hints=make_hints(columns=example_table_columns), data_item_format="model"
         )
 
     # run sql jobs
-    pipeline.run(copied_table())
+    pipeline.run([copied_table(), copied_table_2()])
 
     # the two tables where created
-    assert load_table_counts(pipeline, "example_table", "copied_table", "copied_table2") == {
-        "example_table": 10,
+    assert load_table_counts(pipeline, "copied_table", "copied_table_2", "example_table") == {
         "copied_table": 5,
-        "copied_table2": 7,
+        "copied_table_2": 7,
+        "example_table": 10,
     }
 
     # we have a table entry for the main table "copied_table"
     assert "copied_table" in pipeline.default_schema.tables
-    # but no columns, it's up to the user to provide a schema
-    assert len(pipeline.default_schema.tables["copied_table"]["columns"]) == 0
+    # and we only have the three columns from the original table
+    assert set(pipeline.default_schema.tables["copied_table"]["columns"].keys()) == {
+        "a",
+        "_dlt_id",
+        "_dlt_load_id",
+    }