From 3a158957b692c7a962bee46905ea1b64c5bffd5e Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 13 Feb 2024 13:38:59 +0100 Subject: [PATCH] Add test to capture Duckdb behavior (#389) * Add test to capture Duckdb behavior * Fix timezone issue --- tests/catalog/test_sql.py | 6 ---- tests/conftest.py | 6 ++++ tests/integration/test_writes.py | 61 ++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 1d127378be..f6ff78283b 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -21,7 +21,6 @@ import pyarrow as pa import pytest -from pytest import TempPathFactory from pytest_lazyfixture import lazy_fixture from sqlalchemy.exc import ArgumentError, IntegrityError @@ -51,11 +50,6 @@ from pyiceberg.types import IntegerType -@pytest.fixture(name="warehouse", scope="session") -def fixture_warehouse(tmp_path_factory: TempPathFactory) -> Path: - return tmp_path_factory.mktemp("test_sql") - - @pytest.fixture(name="random_identifier") def fixture_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) diff --git a/tests/conftest.py b/tests/conftest.py index c0c3d10273..0e0fbd6836 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,6 +31,7 @@ import string import uuid from datetime import datetime +from pathlib import Path from random import choice from tempfile import TemporaryDirectory from typing import ( @@ -1929,6 +1930,11 @@ def example_task(data_file: str) -> FileScanTask: ) +@pytest.fixture(scope="session") +def warehouse(tmp_path_factory: pytest.TempPathFactory) -> Path: + return tmp_path_factory.mktemp("test_sql") + + @pytest.fixture def table_v1(example_table_metadata_v1: Dict[str, Any]) -> Table: table_metadata = TableMetadataV1(**example_table_metadata_v1) diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py index 58ab830319..54b647b8ed 100644 --- a/tests/integration/test_writes.py +++ b/tests/integration/test_writes.py @@ -15,19 +15,24 @@ # specific language governing permissions and limitations # under the License. # pylint:disable=redefined-outer-name +import os +import time import uuid from datetime import date, datetime +from pathlib import Path from typing import Any, Dict, List from urllib.parse import urlparse import pyarrow as pa import pyarrow.parquet as pq import pytest +import pytz from pyarrow.fs import S3FileSystem from pyspark.sql import SparkSession from pytest_mock.plugin import MockerFixture from pyiceberg.catalog import Catalog, Properties, Table, load_catalog +from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.exceptions import NamespaceAlreadyExistsError, NoSuchTableError from pyiceberg.schema import Schema from pyiceberg.types import ( @@ -573,3 +578,59 @@ def test_summaries_with_only_nulls( 'total-position-deletes': '0', 'total-records': '0', } + + +@pytest.mark.integration +def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> None: + os.environ['TZ'] = 'Etc/UTC' + time.tzset() + tz = pytz.timezone(os.environ['TZ']) + + catalog = SqlCatalog("test_sql_catalog", uri="sqlite:///:memory:", warehouse=f"/{warehouse}") + catalog.create_namespace("default") + + identifier = "default.arrow_table_v1_with_null" + tbl = _create_table(catalog, identifier, {}, [arrow_table_with_null]) + location = tbl.metadata_location + + import duckdb + + duckdb.sql('INSTALL iceberg; LOAD iceberg;') + result = duckdb.sql( + f""" + SELECT * + FROM iceberg_scan('{location}') + """ + ).fetchall() + + assert result == [ + ( + False, + 'a', + 'aaaaaaaaaaaaaaaaaaaaaa', + 1, + 1, + 0.0, + 0.0, + datetime(2023, 1, 1, 19, 25), + datetime(2023, 1, 1, 19, 25, tzinfo=tz), + date(2023, 1, 1), + b'\x01', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + ), + (None, None, None, None, None, None, None, None, None, None, None, None), + ( + True, + 'z', + 'zzzzzzzzzzzzzzzzzzzzzz', + 9, + 9, + 0.8999999761581421, + 0.9, + datetime(2023, 3, 1, 19, 25), + datetime(2023, 3, 1, 19, 25, tzinfo=tz), + date(2023, 3, 1), + b'\x12', + b'\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11', + ), + ]