Skip to content

Commit

Permalink
Merge pull request astropy#937 from taldcroft/table/stack
Browse files Browse the repository at this point in the history
Stack tables horizontally (by columns) or vertically (by rows, i.e. append)
  • Loading branch information
taldcroft committed May 10, 2013
2 parents e029253 + a71fc79 commit e07f417
Show file tree
Hide file tree
Showing 10 changed files with 1,515 additions and 541 deletions.
2 changes: 2 additions & 0 deletions astropy/table/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from .table import Column, Table, TableColumns, Row, MaskedColumn
from .np_utils import TableMergeError
from .operations import join, hstack, vstack

# Import routines that connect readers/writers to astropy.table
from ..io.ascii import connect
Expand Down
406 changes: 334 additions & 72 deletions astropy/table/np_utils.py

Large diffs are not rendered by default.

250 changes: 250 additions & 0 deletions astropy/table/operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
"""
High-level table operations:
- join()
- hstack()
- vstack()
"""
# Licensed under a 3-clause BSD style license - see LICENSE.rst

from copy import deepcopy
import warnings
import collections

from ..utils import OrderedDict, metadata
from . import np_utils

__all__ = ['join', 'hstack', 'vstack']


def _merge_col_meta(out, tables, col_name_map, idx_left=0, idx_right=1):
"""
Merge column meta data for the ``out`` table.
This merges column meta, which includes attributes units, format,
and description, as well as the actual `meta` atttribute. It is
assumed that the ``out`` table was created by merging ``tables``.
The ``col_name_map`` provides the mapping from col name in ``out``
back to the original name (which may be different).
"""
# Set column meta
attrs = ('units', 'format', 'description')
for out_col in out.columns.values():
for idx_table, table in enumerate(tables):
left_col = out_col
right_name = col_name_map[out_col.name][idx_table]

if right_name:
right_col = table[right_name]
out_col.meta = metadata.merge(left_col.meta, right_col.meta)
for attr in attrs:
left_attr = getattr(left_col, attr)
right_attr = getattr(right_col, attr)
merge_attr = left_attr or right_attr
setattr(out_col, attr, merge_attr)
if left_attr and right_attr and left_attr != right_attr:
warnings.warn('In merged column {0!r} the {1!r} attribute does not match '
'({2} != {3}). Using {2} for merged output'
.format(out_col.name, attr, left_attr, right_attr),
metadata.MergeConflictWarning)


def _merge_table_meta(out, tables):
out_meta = deepcopy(tables[0].meta)
for table in tables[1:]:
out_meta = metadata.merge(out_meta, table.meta)
out.meta.update(out_meta)


def _get_list_of_tables(tables):
"""
Check that tables is a Table or sequence of Tables. Returns the
corresponding list of Tables.
"""
from .table import Table
err = '`tables` arg must be a Table or sequence of Tables'
if isinstance(tables, Table):
tables = [tables]
elif isinstance(tables, collections.Sequence):
if any(not isinstance(x, Table) for x in tables):
raise TypeError(err)
else:
raise TypeError(err)

return list(tables)


def join(left, right, keys=None, join_type='inner',
uniq_col_name='{col_name}_{table_name}',
table_names=['1', '2']):
"""
Perform a join of the left table with the right table on specified keys.
Parameters
----------
left : Table object or a value that will initialize a Table object
Left side table in the join
right : Table object or a value that will initialize a Table object
Right side table in the join
keys : str or list of str
Column(s) used to match rows of left and right tables. Default
is to use all columns which are common to both tables.
join_type : str
Join type ('inner' | 'outer' | 'left' | 'right'), default is 'inner'
uniq_col_name : str or None
String generate a unique output column name in case of a conflict.
The default is '{col_name}_{table_name}'.
table_names : list of str or None
Two-element list of table names used when generating unique output
column names. The default is ['1', '2'].
"""
from .table import Table

# Try converting inputs to Table as needed
if not isinstance(left, Table):
left = Table(left)
if not isinstance(right, Table):
right = Table(right)

col_name_map = OrderedDict()
out_data = np_utils.join(left._data, right._data, keys, join_type,
uniq_col_name, table_names, col_name_map)
# Create the output (Table or subclass of Table)
out = Table(out_data)

# Merge the column and table meta data. Table subclasses might override
# these methods for custom merge behavior.
_merge_col_meta(out, [left, right], col_name_map)
_merge_table_meta(out, [left, right])

return out


def vstack(tables, join_type='outer'):
"""
Stack tables vertically (along rows)
A ``join_type`` of 'exact' means that the tables must all
have exactly the same column names (though the order can vary). If
``join_type`` is 'inner' then the intersection of common columns will
be output. A value of 'outer' (default) means the output will have the union of
all columns, with table values being masked where no common values are
available.
Parameters
----------
tables : Table or list of Table objects
Table(s) to stack along rows (vertically) with the current table
join_type : str
Join type ('inner' | 'exact' | 'outer'), default is 'exact'
Examples
--------
To stack two tables along rows do::
>>> from astropy.table import vstack, Table
>>> t1 = Table({'a': [1, 2], 'b': [3, 4]}, names=('a', 'b'))
>>> t2 = Table({'a': [5, 6], 'b': [7, 8]}, names=('a', 'b'))
>>> print t1
a b
--- ---
1 3
2 4
>>> print t2
a b
--- ---
5 7
6 8
>>> print vstack([t1, t2])
a b
--- ---
1 3
2 4
5 7
6 8
"""
from .table import Table

tables = _get_list_of_tables(tables) # validates input
arrays = [table._data for table in tables]
col_name_map = OrderedDict()

out_data = np_utils.vstack(arrays, join_type, col_name_map)
out = Table(out_data)

# Merge column and table metadata
_merge_col_meta(out, tables, col_name_map)
_merge_table_meta(out, tables)

return out


def hstack(tables, join_type='outer',
uniq_col_name='{col_name}_{table_name}', table_names=None):
"""
Stack tables along columns (horizontally)
A ``join_type`` of 'exact' means that the tables must all
have exactly the same number of row. If ``join_type`` is 'inner' then
the intersection of rows will be output. A value of 'outer' (default) means
the output will have the union of all rows, with table values being
masked where no common values are available.
Parameters
----------
tables : List of Table objects
Tables to stack along columns (horizontally) with the current table
join_type : str
Join type ('inner' | 'exact' | 'outer'), default is 'outer'
uniq_col_name : str or None
String generate a unique output column name in case of a conflict.
The default is '{col_name}_{table_name}'.
table_names : list of str or None
Two-element list of table names used when generating unique output
column names. The default is ['1', '2', ..].
col_name_map : empty dict or None
If passed as a dict then it will be updated in-place with the
mapping of output to input column names.
Examples
--------
To stack two tables horizontally (along columns) do::
>>> from astropy.table import Table, hstack
>>> t1 = Table({'a': [1, 2], 'b': [3, 4]}, names=('a', 'b'))
>>> t2 = Table({'c': [5, 6], 'd': [7, 8]}, names=('c', 'd'))
>>> print t1
a b
--- ---
1 3
2 4
>>> print t2
c d
--- ---
5 7
6 8
>>> print hstack([t1, t2])
a b c d
--- --- --- ---
1 3 5 7
2 4 6 8
"""
from .table import Table

tables = _get_list_of_tables(tables) # validates input
arrays = [table._data for table in tables]
col_name_map = OrderedDict()

out_data = np_utils.hstack(arrays, join_type, uniq_col_name, table_names,
col_name_map)
out = Table(out_data)

_merge_col_meta(out, tables, col_name_map)
_merge_table_meta(out, tables)

return out
105 changes: 4 additions & 101 deletions astropy/table/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,19 @@
import sys
from copy import deepcopy
import functools
import warnings

import numpy as np
from numpy import ma

from ..units import Unit
from .. import log
from ..utils import OrderedDict, isiterable, meta
from ..utils import OrderedDict, isiterable
from .structhelper import _drop_fields
from .pprint import _pformat_table, _pformat_col, _pformat_col_iter, _more_tabcol
from ..utils.console import color_print
from ..config import ConfigurationItem
from ..io import registry as io_registry
from . import np_utils
from . import operations

# Python 2 and 3 source compatibility
try:
Expand All @@ -27,7 +26,8 @@

NUMPY_LT_1P5 = [int(x) for x in np.__version__.split('.')[:2]] < [1, 5]

AUTO_COLNAME = ConfigurationItem('auto_colname', 'col{0}',
AUTO_COLNAME = ConfigurationItem(
'auto_colname', 'col{0}',
'The template that determines the name of a column if it cannot be '
'determined. Uses new-style (format method) string formatting')

Expand Down Expand Up @@ -57,59 +57,6 @@ def _auto_names(n_cols):
return [AUTO_COLNAME().format(i) for i in range(n_cols)]


def _merge_col_meta(out, left, right, col_name_map):
"""
Merge column meta data for the ``out`` table.
This merges column meta, which includes attributes units, format,
and description, as well as the actual `meta` atttribute. It is
assumed that the ``out`` table was created by merging ``left``
and ``right`` (e.g. by joining). The ``col_name_map`` provides
the mapping from col name in ``out`` back to the original name
(which may be different).
If a column was derived from both left and right tables (e.g.
a key column in a join) then ``meta`` values are merged. The
attributes (units, format, description) are set from the logical
"or" of the left and right values, e.g. (left.units or right.units).
This selects the first non-trivial value, and does not check for
conflicts.
"""
# Set column meta
attrs = ('units', 'format', 'description')
for out_col in out.columns.values():
left_name, right_name = col_name_map[out_col.name]
left_col = (left[left_name] if left_name else None)
right_col = (right[right_name] if right_name else None)

if left_name and right_name:
out_col.meta = meta.merge(left_col.meta, right_col.meta)
for attr in attrs:
left_attr = getattr(left_col, attr)
right_attr = getattr(right_col, attr)
merge_attr = left_attr or right_attr
setattr(out_col, attr, merge_attr)
if left_attr and right_attr and left_attr != right_attr:
warnings.warn('Left and right column {0} attributes do not match '
'({1} != {2}) using left for merged output'
.format(attr, left_attr, right_attr),
meta.MergeConflictWarning)
elif left_name:
out_col.meta = deepcopy(left_col.meta)
for attr in attrs:
setattr(out_col, attr, getattr(left_col, attr))
elif right_name:
out_col.meta = deepcopy(right_col.meta)
for attr in attrs:
setattr(out_col, attr, getattr(right_col, attr))
else:
raise ValueError('Unexpected column names')


def _merge_table_meta(out, left, right):
out.meta = meta.merge(left.meta, right.meta)


class TableColumns(OrderedDict):
"""OrderedDict subclass for a set of columns.
Expand Down Expand Up @@ -1989,47 +1936,3 @@ def reverse(self):

read = classmethod(io_registry.read)
write = io_registry.write

def join(self, right, keys=None, join_type='inner',
uniq_col_name='{col_name}_{table_name}',
table_names=['1', '2']):
"""
Perform a join of this (left) table with the right table on specified keys.
Parameters
----------
right : Table object or a value that will initialize a Table object
Right side table in the join
keys : str or list of str
Column(s) used to match rows of left and right tables. Default
is to use all columns which are common to both tables.
join_type : str
Join type ('inner' | 'outer' | 'left' | 'right'), default is 'inner'
uniq_col_name : str or None
String generate a unique output column name in case of a conflict.
The default is '{col_name}_{table_name}'.
table_names : list of str or None
Two-element list of table names used when generating unique output
column names. The default is ['1', '2'].
"""
# In "t1.join(t2)" self (i.e. t1) is the left table.
left = self

# If right isn't a table, use right as the data for a new Table
# that has the same class as left.
if not isinstance(right, Table):
right = left.__class__(right)
col_name_map = {}
out_data = np_utils.join(left._data, right._data, keys, join_type,
uniq_col_name, table_names, col_name_map)
# Create the output (Table or subclass of Table)
out = left.__class__(out_data)

# Merge the column and table meta data. In this context 'self' is
# just the left input table. Table subclasses might override these
# methods for custom merge behavior.
_merge_col_meta(out, left, right, col_name_map)
_merge_table_meta(out, left, right)

return out
Loading

0 comments on commit e07f417

Please sign in to comment.