From 2d8ae0b40bdfb75bcceff0c73d41a52d4bffb5dc Mon Sep 17 00:00:00 2001 From: Alec Koumjian Date: Tue, 14 Jan 2025 11:57:48 -0500 Subject: [PATCH] Assume attributes from first value when concatenating only empty tables --- quivr/concat.py | 12 ++++++++++-- test/test_concat.py | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/quivr/concat.py b/quivr/concat.py index 2aad710..34d73cd 100644 --- a/quivr/concat.py +++ b/quivr/concat.py @@ -29,6 +29,9 @@ def concatenate( if len(values_list) == 0: raise ValueError("No values to concatenate") + # Note, we don't return immediately if there is only one table, + # because we still want to optionally defragment the result. + batches = [] first_full = False @@ -58,10 +61,15 @@ def concatenate( ) if len(batches) == 0: - return first_cls.empty() + # Return the first table, to preserve the attributes + table = first_val.table + else: + table = pa.Table.from_batches(batches) - table = pa.Table.from_batches(batches) + # We re-initialize the table to optionally validate and create + # a unique object result = first_cls.from_pyarrow(table=table, validate=validate) + if defrag: result = defragment.defragment(result) return result diff --git a/test/test_concat.py b/test/test_concat.py index d4a9160..dd999b1 100644 --- a/test/test_concat.py +++ b/test/test_concat.py @@ -147,3 +147,30 @@ def test_concatenate_same_attrs(): def test_concatenate_no_values(): with pytest.raises(ValueError, match="No values to concatenate"): qv.concatenate([]) + + +def test_concatenate_empty_tables_preserve_attributes(): + class TableWithAttrs(qv.Table): + x = qv.Int64Column() + y = qv.Int64Column() + name = qv.StringAttribute(default="default") + id = qv.IntAttribute(default=0) + + # Create two empty tables with non-default attributes + t1 = TableWithAttrs.from_kwargs(x=[], y=[], name="foo", id=1) + t2 = TableWithAttrs.from_kwargs(x=[], y=[], name="bat", id=3) + + # Concatenate them and verify we get an empty table with the same attributes + have = qv.concatenate([t1, t2]) + assert len(have) == 0 + assert have.name == "foo" # Not "default" + assert have.id == 1 # Not 0 + + # Also verify it works when concatenating with a non-empty table + # Attributes should be preserved from the non-empty tables + t3 = TableWithAttrs.from_kwargs(x=[1], y=[2], name="bar", id=2) + have = qv.concatenate([t1, t2, t3]) + assert len(have) == 1 + assert have.name == "bar" + assert have.id == 2 +