Skip to content

Commit

Permalink
Python: Add checks after building the wheels (#8532)
Browse files Browse the repository at this point in the history
* Python: Add checks after building the wheels

* Improvements
  • Loading branch information
Fokko committed Sep 30, 2023
1 parent 76b6517 commit 9b193fa
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 27 deletions.
34 changes: 13 additions & 21 deletions build-module.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@


def build_cython_extensions() -> None:
import Cython.Compiler.Options # pyright: ignore [reportMissingImports]
from Cython.Build import build_ext, cythonize # pyright: ignore [reportMissingImports]
import Cython.Compiler.Options
from Cython.Build import build_ext, cythonize
from setuptools import Extension
from setuptools.dist import Distribution

Expand All @@ -40,28 +40,20 @@ def build_cython_extensions() -> None:
extra_compile_args = [
"-O3",
]
# Relative to project root directory
include_dirs = {
"pyiceberg/",
}

extensions = [
Extension(
# Your .pyx file will be available to cpython at this location.
"pyiceberg.avro.decoder_fast",
[
"pyiceberg/avro/decoder_fast.pyx",
],
include_dirs=list(include_dirs),
extra_compile_args=extra_compile_args,
language="c",
),
]
package_path = "pyiceberg"

for extension in extensions:
include_dirs.update(extension.include_dirs)
extension = Extension(
# Your .pyx file will be available to cpython at this location.
name="pyiceberg.avro.decoder_fast",
sources=[
os.path.join(package_path, "avro", "decoder_fast.pyx"),
],
extra_compile_args=extra_compile_args,
language="c",
)

ext_modules = cythonize(extensions, include_path=list(include_dirs), language_level=3, annotate=True)
ext_modules = cythonize([extension], include_path=list(package_path), language_level=3, annotate=True)
dist = Distribution({"ext_modules": ext_modules})
cmd = build_ext(dist)
cmd.ensure_finalized()
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ packages = [
]
include = [
{ path = "dev", format = "sdist" },
{ path = "pyiceberg/**/*.so", format = "wheel" }
{ path = "pyiceberg/**/*.so", format = "wheel" },
{ path = "pyiceberg/**/*.pyd", format = "wheel" },
]

[tool.poetry.dependencies]
Expand Down
22 changes: 17 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from random import choice
from tempfile import TemporaryDirectory
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
Expand All @@ -50,10 +51,8 @@
import boto3
import botocore.awsrequest
import botocore.model
import pyarrow as pa
import pytest
from moto import mock_dynamodb, mock_glue, mock_s3
from pyarrow import parquet as pq

from pyiceberg import schema
from pyiceberg.catalog import Catalog
Expand All @@ -69,7 +68,6 @@
load_file_io,
)
from pyiceberg.io.fsspec import FsspecFileIO
from pyiceberg.io.pyarrow import PyArrowFile, PyArrowFileIO
from pyiceberg.manifest import DataFile, FileFormat
from pyiceberg.schema import Schema
from pyiceberg.serializers import ToOutputFile
Expand All @@ -91,6 +89,9 @@
)
from pyiceberg.utils.datetime import datetime_to_millis

if TYPE_CHECKING:
from pyiceberg.io.pyarrow import PyArrowFile, PyArrowFileIO


def pytest_collection_modifyitems(items: List[pytest.Item]) -> None:
for item in items:
Expand Down Expand Up @@ -421,6 +422,8 @@ def example_table_metadata_v2() -> Dict[str, Any]:

@pytest.fixture(scope="session")
def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:
from pyiceberg.io.pyarrow import PyArrowFileIO

metadata_location = str(tmp_path_factory.mktemp("metadata") / f"{uuid.uuid4()}.metadata.json")
metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2)
ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True)
Expand All @@ -429,6 +432,8 @@ def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:

@pytest.fixture(scope="session")
def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
from pyiceberg.io.pyarrow import PyArrowFileIO

metadata_location = str(tmp_path_factory.mktemp("metadata") / f"{uuid.uuid4()}.gz.metadata.json")
metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2)
ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True)
Expand Down Expand Up @@ -1146,7 +1151,9 @@ def __len__(self) -> int:
def exists(self) -> bool:
return os.path.exists(self._path)

def to_input_file(self) -> PyArrowFile:
def to_input_file(self) -> "PyArrowFile":
from pyiceberg.io.pyarrow import PyArrowFileIO

return PyArrowFileIO().new_input(location=self.location)

def create(self, overwrite: bool = False) -> OutputStream:
Expand Down Expand Up @@ -1399,7 +1406,9 @@ def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO:


@pytest.fixture
def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> PyArrowFileIO:
def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> "PyArrowFileIO":
from pyiceberg.io.pyarrow import PyArrowFileIO

properties = {
GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
Expand Down Expand Up @@ -1620,6 +1629,9 @@ def clean_up(test_catalog: Catalog) -> None:

@pytest.fixture
def data_file(table_schema_simple: Schema, tmp_path: str) -> str:
import pyarrow as pa
from pyarrow import parquet as pq

table = pa.table(
{"foo": ["a", "b", "c"], "bar": [1, 2, 3], "baz": [True, False, None]},
metadata={"iceberg.schema": table_schema_simple.model_dump_json()},
Expand Down

0 comments on commit 9b193fa

Please sign in to comment.