diff --git a/CHANGELOG.md b/CHANGELOG.md index bf99cf8f6..2a2df0cd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] None yet. +## v0.6.0 +### Changed +- Added `make_array` to make arrays without lists. (#224) +- `Table.select`, `drop`, and `with_columns` now accept variable arguments in addition to lists. (#224) + ## v0.5.3 ### Changed - Allow charting methods to select particular columns and default to diff --git a/datascience/tables.py b/datascience/tables.py index b7db15621..07634177a 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -527,13 +527,15 @@ def copy(self, *, shallow=False): self._add_column_and_format(table, label, column) return table - def select(self, column_label_or_labels): + def select(self, *column_label_or_labels): """Return a Table with selected column or columns by label or index. Args: - ``column_label_or_labels`` (string or list of strings): The header - names or indices of the columns to be selected. ``column_label_or_labels`` must - be an existing header name, or a valid column index. + ``column_label_or_labels`` (string, list of strings, or several + separate argument strings): The header names or indices of the + columns to be selected. ``column_label_or_labels`` must + be an existing header name, or a valid column index, or a list + thereof. Returns: An instance of ``Table`` containing only selected columns. @@ -557,6 +559,11 @@ def select(self, column_label_or_labels): 6 5 5 + >>> t.select('burgers', 'calories') + burgers | calories + cheeseburger | 743 + hamburger | 651 + veggie burger | 582 >>> t.select(1) prices 6 @@ -567,8 +574,13 @@ def select(self, column_label_or_labels): 743 | cheeseburger 651 | hamburger 582 | veggie burger + >>> t.select(2, 0) + calories | burgers + 743 | cheeseburger + 651 | hamburger + 582 | veggie burger """ - labels = self._as_labels(column_label_or_labels) + labels = self._varargs_as_labels(column_label_or_labels) table = Table() for label in labels: self._add_column_and_format(table, label, np.copy(self[label])) @@ -583,7 +595,7 @@ def take(self): def exclude(self): raise NotImplementedError() - def drop(self, column_label_or_labels): + def drop(self, *column_label_or_labels): """Return a Table with only columns other than selected label or labels. Args: @@ -613,18 +625,28 @@ def drop(self, column_label_or_labels): 6 5 5 + >>> t.drop('burgers', 'calories') + prices + 6 + 5 + 5 >>> t.drop([0, 2]) prices 6 5 5 + >>> t.drop(0, 2) + prices + 6 + 5 + 5 >>> t.drop(1) burgers | calories cheeseburger | 743 hamburger | 651 veggie burger | 582 """ - exclude = _as_labels(column_label_or_labels) + exclude = _varargs_labels_as_list(column_label_or_labels) return self.select([c for (i, c) in enumerate(self.labels) if i not in exclude and c not in exclude]) def where(self, column_or_label, value_or_predicate=None, other=None): @@ -1068,6 +1090,11 @@ def _as_labels(self, label_or_labels): """Convert single label to list and convert indices to labels.""" return [self._as_label(s) for s in _as_labels(label_or_labels)] + def _varargs_as_labels(self, label_list): + """Converts a list of labels or singleton list of list of labels into + a list of labels. Useful when labels are passed as varargs.""" + return self._as_labels(_varargs_labels_as_list(label_list)) + def _unused_label(self, label): """Generate an unused label.""" original = label @@ -1327,7 +1354,7 @@ def with_column(self, label, values): new_table.append_column(label, values) return new_table - def with_columns(self, labels_and_values): + def with_columns(self, *labels_and_values): """Return a table with additional or replaced columns. Args: @@ -1341,6 +1368,13 @@ def with_columns(self, labels_and_values): letter | count c | 2 d | 4 + >>> Table().with_columns( + ... 'letter', ['c', 'd'], + ... 'count', [2, 4], + ... ) + letter | count + c | 2 + d | 4 >>> Table().with_columns([ ... ['letter', ['c', 'd']], ... ['count', [2, 4]], @@ -1348,11 +1382,38 @@ def with_columns(self, labels_and_values): letter | count c | 2 d | 4 + >>> Table().with_columns( + ... ['letter', ['c', 'd']], + ... ['count', [2, 4]], + ... ) + letter | count + c | 2 + d | 4 + >>> Table().with_columns([ + ... ['letter', ['c', 'd']], + ... ]) + letter + c + d + >>> Table().with_columns( + ... 'letter', ['c', 'd'], + ... ) + letter + c + d + >>> Table().with_columns( + ... ['letter', ['c', 'd']], + ... ) + letter + c + d >>> Table().with_columns({'letter': ['c', 'd']}) letter c d """ + if len(labels_and_values) == 1: + labels_and_values = labels_and_values[0] if isinstance(labels_and_values, collections.abc.Mapping): labels_and_values = list(labels_and_values.items()) if not isinstance(labels_and_values, collections.abc.Sequence): @@ -2176,6 +2237,20 @@ def _as_labels(column_label_or_labels): else: return column_label_or_labels +def _varargs_labels_as_list(label_list): + """Return a list of labels for a list of labels or singleton list of list + of labels.""" + if len(label_list) == 0: + return [] + elif not _is_non_string_iterable(label_list[0]): + # Assume everything is a label. If not, it'll be caught later. + return label_list + elif len(label_list) == 1: + return label_list[0] + else: + raise ValueError("Labels {} contain more than list.".format(label_list), + "Pass just one list of labels.") + def _assert_same(values): """Assert that all values are identical and return the unique value.""" assert len(values) > 0 diff --git a/datascience/util.py b/datascience/util.py index 11fe540fd..49b4f557b 100644 --- a/datascience/util.py +++ b/datascience/util.py @@ -1,7 +1,7 @@ """Utility functions""" -__all__ = ['percentile', 'plot_cdf_area', 'plot_normal_cdf', 'table_apply', - 'minimize'] +__all__ = ['make_array', 'percentile', 'plot_cdf_area', 'plot_normal_cdf', + 'table_apply', 'minimize'] import numpy as np import pandas as pd @@ -14,6 +14,25 @@ import math +def make_array(*elements): + """Returns an array containing all the arguments passed to this function. + A simple way to make an array with a few elements. + + As with any array, all arguments should have the same type. + + >>> make_array(0) + array([0]) + >>> make_array(2, 3, 4) + array([2, 3, 4]) + >>> make_array("foo", "bar") + array(['foo', 'bar'], + dtype='>> make_array() + array([], dtype=float64) + """ + return np.array(elements) + + def percentile(p, arr=None): """Returns the pth percentile of the input array (the value that is at least as great as p% of the values in the array). diff --git a/datascience/version.py b/datascience/version.py index e7a8889bf..ef7eb44d9 100644 --- a/datascience/version.py +++ b/datascience/version.py @@ -1 +1 @@ -__version__ = '0.5.20' +__version__ = '0.6.0'