Skip to content

Commit

Permalink
Assume attributes from first value when concatenating only empty tables
Browse files Browse the repository at this point in the history
  • Loading branch information
akoumjian committed Jan 14, 2025
1 parent ba6d18b commit 2d8ae0b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 2 deletions.
12 changes: 10 additions & 2 deletions quivr/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def concatenate(
if len(values_list) == 0:
raise ValueError("No values to concatenate")

# Note, we don't return immediately if there is only one table,
# because we still want to optionally defragment the result.

batches = []
first_full = False

Expand Down Expand Up @@ -58,10 +61,15 @@ def concatenate(
)

if len(batches) == 0:
return first_cls.empty()
# Return the first table, to preserve the attributes
table = first_val.table
else:
table = pa.Table.from_batches(batches)

table = pa.Table.from_batches(batches)
# We re-initialize the table to optionally validate and create
# a unique object
result = first_cls.from_pyarrow(table=table, validate=validate)

if defrag:
result = defragment.defragment(result)
return result
27 changes: 27 additions & 0 deletions test/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,30 @@ def test_concatenate_same_attrs():
def test_concatenate_no_values():
with pytest.raises(ValueError, match="No values to concatenate"):
qv.concatenate([])


def test_concatenate_empty_tables_preserve_attributes():
class TableWithAttrs(qv.Table):
x = qv.Int64Column()
y = qv.Int64Column()
name = qv.StringAttribute(default="default")
id = qv.IntAttribute(default=0)

# Create two empty tables with non-default attributes
t1 = TableWithAttrs.from_kwargs(x=[], y=[], name="foo", id=1)
t2 = TableWithAttrs.from_kwargs(x=[], y=[], name="bat", id=3)

# Concatenate them and verify we get an empty table with the same attributes
have = qv.concatenate([t1, t2])
assert len(have) == 0
assert have.name == "foo" # Not "default"
assert have.id == 1 # Not 0

# Also verify it works when concatenating with a non-empty table
# Attributes should be preserved from the non-empty tables
t3 = TableWithAttrs.from_kwargs(x=[1], y=[2], name="bar", id=2)
have = qv.concatenate([t1, t2, t3])
assert len(have) == 1
assert have.name == "bar"
assert have.id == 2

0 comments on commit 2d8ae0b

Please sign in to comment.