Skip to content

Commit

Permalink
Linting errors fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
lloyd-EA committed Jan 27, 2025
1 parent 6a70c91 commit bd1d5ad
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 9 deletions.
7 changes: 2 additions & 5 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2423,6 +2423,7 @@ def _check_pyarrow_schema_compatible(

def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_paths: Iterator[str]) -> Iterator[DataFile]:
from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE

for file_path in file_paths:
input_file = io.new_input(file_path)
with input_file.open() as input_stream:
Expand All @@ -2434,11 +2435,7 @@ def parquet_files_to_data_files(io: FileIO, table_metadata: TableMetadata, file_
)
schema = table_metadata.schema()
downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
_check_pyarrow_schema_compatible(
schema,
parquet_metadata.schema.to_arrow_schema(),
downcast_ns_timestamp_to_us
)
_check_pyarrow_schema_compatible(schema, parquet_metadata.schema.to_arrow_schema(), downcast_ns_timestamp_to_us)

statistics = data_file_statistics_from_parquet_metadata(
parquet_metadata=parquet_metadata,
Expand Down
8 changes: 4 additions & 4 deletions tests/integration/test_add_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ def test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_v

@pytest.mark.integration
def test_add_files_with_automatic_downcast_of_timestamp_to_us(
session_catalog: Catalog, format_version: int, mocker: MockerFixture
session_catalog: Catalog, format_version: int, mocker: MockerFixture
) -> None:
nanoseconds_schema_iceberg = Schema(NestedField(1, "quux", TimestamptzType()))

Expand Down Expand Up @@ -663,9 +663,9 @@ def test_add_files_with_automatic_downcast_of_timestamp_to_us(

# add the parquet files as data files
tbl.add_files(file_paths=[file_path])
data_scan = tbl.scan(selected_fields=('quux',)).to_arrow()
assert data_scan['quux'].type == pa.timestamp(unit='us', tz='UTC') # timestamp unit check
assert data_scan['quux'][0].value == 1615967687249846 # down-casted value of the timestamp must be 'us' long
data_scan = tbl.scan(selected_fields=("quux",)).to_arrow()
assert data_scan["quux"].type == pa.timestamp(unit="us", tz="UTC") # timestamp unit check
assert data_scan["quux"][0].value == 1615967687249846 # down-casted value of the timestamp must be 'us' long


@pytest.mark.integration
Expand Down

0 comments on commit bd1d5ad

Please sign in to comment.