Skip to content

Commit e8f1c9c

Browse files
committed
register table integration tests
1 parent 5cedb9a commit e8f1c9c

File tree

3 files changed

+100
-92
lines changed

3 files changed

+100
-92
lines changed

tests/catalog/test_hive.py

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -204,87 +204,6 @@ def test_check_number_of_namespaces(table_schema_simple: Schema) -> None:
204204
catalog.create_table("table", schema=table_schema_simple)
205205

206206

207-
@pytest.mark.parametrize("hive2_compatible", [True, False])
208-
@patch("time.time", MagicMock(return_value=12345))
209-
def test_register_table(
210-
table_schema_with_all_types: Schema,
211-
hive_database: HiveDatabase,
212-
hive_table: HiveTable,
213-
hive2_compatible: bool,
214-
metadata_with_owner_location: str,
215-
) -> None:
216-
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
217-
if hive2_compatible:
218-
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL, **{"hive.hive2-compatible": "true"})
219-
220-
catalog._client = MagicMock()
221-
catalog._client.__enter__().create_table.return_value = None
222-
catalog._client.__enter__().register_table.return_value = None
223-
catalog._client.__enter__().get_table.return_value = hive_table
224-
catalog._client.__enter__().get_database.return_value = hive_database
225-
226-
catalog.register_table(("default", "table"), metadata_location=metadata_with_owner_location)
227-
228-
catalog._client.__enter__().create_table.assert_called_with(
229-
HiveTable(
230-
tableName="table",
231-
dbName="default",
232-
owner="test",
233-
createTime=12345,
234-
lastAccessTime=12345,
235-
retention=None,
236-
sd=StorageDescriptor(
237-
cols=[
238-
FieldSchema(name="x", type="bigint", comment=None), # Corrected columns
239-
FieldSchema(name="y", type="bigint", comment="comment"),
240-
FieldSchema(name="z", type="bigint", comment=None),
241-
],
242-
location="s3://bucket/test/location", # Corrected location
243-
inputFormat="org.apache.hadoop.mapred.FileInputFormat",
244-
outputFormat="org.apache.hadoop.mapred.FileOutputFormat",
245-
compressed=None,
246-
numBuckets=None,
247-
serdeInfo=SerDeInfo(
248-
name=None,
249-
serializationLib="org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
250-
parameters=None,
251-
description=None,
252-
serializerClass=None,
253-
deserializerClass=None,
254-
serdeType=None,
255-
),
256-
bucketCols=None,
257-
sortCols=None,
258-
parameters=None,
259-
skewedInfo=None,
260-
storedAsSubDirectories=None,
261-
),
262-
partitionKeys=None,
263-
parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_with_owner_location},
264-
viewOriginalText=None,
265-
viewExpandedText=None,
266-
tableType="EXTERNAL_TABLE",
267-
privileges=None,
268-
temporary=False,
269-
rewriteEnabled=None,
270-
creationMetadata=None,
271-
catName=None,
272-
ownerType=1,
273-
writeId=-1,
274-
isStatsCompliant=None,
275-
colStats=None,
276-
accessType=None,
277-
requiredReadCapabilities=None,
278-
requiredWriteCapabilities=None,
279-
id=None,
280-
fileMetadata=None,
281-
dictionary=None,
282-
txnId=None,
283-
)
284-
)
285-
assert catalog.table_exists(identifier="default.table")
286-
287-
288207
@pytest.mark.parametrize("hive2_compatible", [True, False])
289208
@patch("time.time", MagicMock(return_value=12345))
290209
def test_create_table(

tests/conftest.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,17 +1133,6 @@ def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:
11331133
return metadata_location
11341134

11351135

1136-
@pytest.fixture(scope="session")
1137-
def metadata_with_owner_location(tmp_path_factory: pytest.TempPathFactory) -> str:
1138-
from pyiceberg.io.pyarrow import PyArrowFileIO
1139-
1140-
metadata_location = str(tmp_path_factory.mktemp("metadata") / f"{uuid.uuid4()}.metadata.json")
1141-
metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2)
1142-
metadata.properties["owner"] = "test"
1143-
ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True)
1144-
return metadata_location
1145-
1146-
11471136
@pytest.fixture(scope="session")
11481137
def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
11491138
from pyiceberg.io.pyarrow import PyArrowFileIO
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import pytest
2+
3+
from pyiceberg.catalog import Catalog
4+
from pyiceberg.catalog.hive import (
5+
HiveCatalog,
6+
)
7+
from pyiceberg.exceptions import NoSuchTableError, TableAlreadyExistsError
8+
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
9+
from pyiceberg.schema import Schema
10+
from pyiceberg.table import Table
11+
from pyiceberg.types import (
12+
BooleanType,
13+
DateType,
14+
IntegerType,
15+
NestedField,
16+
StringType,
17+
)
18+
19+
TABLE_SCHEMA = Schema(
20+
NestedField(field_id=1, name="foo", field_type=BooleanType(), required=False),
21+
NestedField(field_id=2, name="bar", field_type=StringType(), required=False),
22+
NestedField(field_id=4, name="baz", field_type=IntegerType(), required=False),
23+
NestedField(field_id=10, name="qux", field_type=DateType(), required=False),
24+
)
25+
26+
27+
def _create_table(
28+
session_catalog: Catalog,
29+
identifier: str,
30+
format_version: int,
31+
location: str,
32+
partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
33+
schema: Schema = TABLE_SCHEMA,
34+
) -> Table:
35+
try:
36+
session_catalog.drop_table(identifier=identifier)
37+
except NoSuchTableError:
38+
pass
39+
40+
return session_catalog.create_table(
41+
identifier=identifier,
42+
schema=schema,
43+
location=location,
44+
properties={"format-version": str(format_version)},
45+
partition_spec=partition_spec,
46+
)
47+
48+
49+
@pytest.mark.integration
50+
def test_hive_register_table(
51+
session_catalog: HiveCatalog,
52+
) -> None:
53+
identifier = "default.hive_register_table"
54+
location = "s3a://warehouse/default/hive_register_table"
55+
tbl = _create_table(session_catalog, identifier, 2, location)
56+
assert session_catalog.table_exists(identifier=identifier)
57+
session_catalog.drop_table(identifier=identifier)
58+
assert not session_catalog.table_exists(identifier=identifier)
59+
session_catalog.register_table(("default", "hive_register_table"), metadata_location=tbl.metadata_location)
60+
assert session_catalog.table_exists(identifier=identifier)
61+
62+
63+
@pytest.mark.integration
64+
def test_hive_register_table_existing(
65+
session_catalog: HiveCatalog,
66+
) -> None:
67+
identifier = "default.hive_register_table_existing"
68+
location = "s3a://warehouse/default/hive_register_table_existing"
69+
tbl = _create_table(session_catalog, identifier, 2, location)
70+
assert session_catalog.table_exists(identifier=identifier)
71+
# Assert that registering the table again raises TableAlreadyExistsError
72+
with pytest.raises(TableAlreadyExistsError):
73+
session_catalog.register_table(("default", "hive_register_table_existing"), metadata_location=tbl.metadata_location)
74+
75+
76+
@pytest.mark.integration
77+
def test_rest_register_table(
78+
session_catalog: Catalog,
79+
) -> None:
80+
identifier = "default.rest_register_table"
81+
location = "s3a://warehouse/default/rest_register_table"
82+
tbl = _create_table(session_catalog, identifier, 2, location)
83+
assert session_catalog.table_exists(identifier=identifier)
84+
session_catalog.drop_table(identifier=identifier)
85+
assert not session_catalog.table_exists(identifier=identifier)
86+
session_catalog.register_table(identifier=identifier, metadata_location=tbl.metadata_location)
87+
assert session_catalog.table_exists(identifier=identifier)
88+
89+
90+
@pytest.mark.integration
91+
def test_rest_register_table_existing(
92+
session_catalog: Catalog,
93+
) -> None:
94+
identifier = "default.rest_register_table_existing"
95+
location = "s3a://warehouse/default/rest_register_table_existing"
96+
tbl = _create_table(session_catalog, identifier, 2, location)
97+
assert session_catalog.table_exists(identifier=identifier)
98+
# Assert that registering the table again raises TableAlreadyExistsError
99+
with pytest.raises(TableAlreadyExistsError):
100+
session_catalog.register_table(identifier=identifier, metadata_location=tbl.metadata_location)

0 commit comments

Comments
 (0)