diff --git a/.gitignore b/.gitignore index 379075b3..3f08a73d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ profile # vi noise *.swp +*~ docs/_build/* coverage.xml nosetests.xml diff --git a/tablib/core.py b/tablib/core.py index 93caded5..a3b9a41c 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -10,7 +10,7 @@ """ from collections import OrderedDict -from copy import copy +from copy import deepcopy from operator import itemgetter from tablib import formats @@ -28,13 +28,16 @@ class Row(object): - """Internal Row object. Mainly used for filtering.""" + """Internal Row object. Mainly used for filtering. Note: To allow label + based indexing Row needs to be aware of the Dataset it belongs to. This is + passed to the constructor's `dset` argument.""" - __slots__ = ['_row', 'tags'] + __slots__ = ['_row', 'tags', '_dset'] - def __init__(self, row=list(), tags=list()): + def __init__(self, row=list(), tags=list(), dset=None): self._row = list(row) self.tags = list(tags) + self._dset = dset def __iter__(self): return (col for col in self._row) @@ -48,14 +51,47 @@ def __repr__(self): def __getslice__(self, i, j): return self._row[i:j] - def __getitem__(self, i): - return self._row[i] + def _index(self, key): + """Returns index for ``key`` (string or int). Raises TypeError if + ``key`` is string bt Dataset has no unique headers set and IndexError + if ``key`` is not in headers.""" - def __setitem__(self, i, value): - self._row[i] = value + if isinstance(key, (str, unicode)): + if not self._dset._lblidx: + raise TypeError("Cannot access element by key '{0}' - Dataset" + " headers not suitable for indexing".format(key)) + try: + i = self._dset.headers.index(key) + except ValueError: + raise IndexError("'{0}' not in Dataset headers".format(key)) + else: + i = key + + return i + + def __getitem__(self, key): + return self._row[self._index(key)] + + def __setitem__(self, key, value): + self._row[self._index(key)] = value + + def __delitem__(self, key): + del self._row[self._index(key)] + + def __add__(self, other): + """Returns concatenation as plain list. ``other`` can be Row or a + sequence type""" + return self._row + list(other) - def __delitem__(self, i): - del self._row[i] + def __eq__(self, other): + """Requires ``_row`` and ``tags`` attributes to be equal but not + headers of respective owning Datasets""" + if not isinstance(other, Row): + raise TypeError("Can't compare Row to %s" % type(other)) + return self._row == other._row and self.tags == other.tags + + def __ne__(self, other): + return not self == other def __getstate__(self): @@ -100,12 +136,8 @@ def has_tag(self, tag): if tag == None: return False - elif isinstance(tag, str): - return (tag in self.tags) else: - return bool(len(set(tag) & set(self.tags))) - - + return (tag in self.tags) class Dataset(object): @@ -158,8 +190,9 @@ class Dataset(object): _formats = {} def __init__(self, *args, **kwargs): - self._data = list(Row(arg) for arg in args) + self._data = list(Row(arg, dset=self) for arg in args) self.__headers = None + self._lblidx = False # ('title', index) tuples self._separators = [] @@ -173,11 +206,9 @@ def __init__(self, *args, **kwargs): self._register_formats() - def __len__(self): return self.height - def __getitem__(self, key): if isinstance(key, (str, unicode)): if key in self.headers: @@ -188,13 +219,13 @@ def __getitem__(self, key): else: _results = self._data[key] if isinstance(_results, Row): - return _results.tuple + return _results else: - return [result.tuple for result in _results] + return [result for result in _results] def __setitem__(self, key, value): self._validate(value) - self._data[key] = Row(value) + self._data[key] = Row(value, dset=self) def __delitem__(self, key): @@ -340,10 +371,13 @@ def _set_headers(self, collection): if collection: try: self.__headers = list(collection) + self._lblidx = (len(set(collection)) == len(collection)) except TypeError: + self._lblidx = False raise TypeError else: self.__headers = None + self._lblidx = False headers = property(_get_headers, _set_headers) @@ -381,14 +415,14 @@ def _set_dict(self, pickle): if isinstance(pickle[0], list): self.wipe() for row in pickle: - self.append(Row(row)) + self.append(Row(row, dset=self)) # if list of objects elif isinstance(pickle[0], dict): self.wipe() self.headers = list(pickle[0].keys()) for row in pickle: - self.append(Row(list(row.values()))) + self.append(Row(list(row.values()), dset=self)) else: raise UnsupportedFormat @@ -675,7 +709,7 @@ def insert(self, index, row, tags=list()): """ self._validate(row) - self._data.insert(index, Row(row, tags=tags)) + self._data.insert(index, Row(row, tags=tags, dset=self)) def rpush(self, row, tags=list()): @@ -796,8 +830,7 @@ def insert_col(self, index, col=None, header=None): row.insert(index, col[i]) self._data[i] = row else: - self._data = [Row([row]) for row in col] - + self._data = [Row([row], dset=self) for row in col] def rpush_col(self, col, header=None): @@ -880,7 +913,7 @@ def filter(self, tag): """Returns a new instance of the :class:`Dataset`, excluding any rows that do not contain the given :ref:`tags `. """ - _dset = copy(self) + _dset = self.copy() _dset._data = [row for row in _dset._data if row.has_tag(tag)] return _dset @@ -949,11 +982,22 @@ def transpose(self): # Adding the column name as now they're a regular column # Use `get_col(index)` in case there are repeated values row_data = [column] + self.get_col(index) - row_data = Row(row_data) + row_data = Row(row_data, dset=self) _dset.append(row=row_data) return _dset + def copy(self): + """Return copy with each Row's Dataset reference set to the new + object""" + + _dset = deepcopy(self) + for row in _dset._data: + row._dset = _dset + + return _dset + + def stack(self, other): """Stack two :class:`Dataset` instances together by joining at the row level, and return new combined @@ -965,14 +1009,17 @@ def stack(self, other): if self.width != other.width: raise InvalidDimensions - # Copy the source data - _dset = copy(self) + # Copy the source data (updates Dataset reference in Rows) + _dset = self.copy() + _dset.extend(other._data) + """ rows_to_stack = [row for row in _dset._data] other_rows = [row for row in other._data] rows_to_stack.extend(other_rows) _dset._data = rows_to_stack + """ return _dset @@ -1022,6 +1069,7 @@ def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None + self._lblidx = None def subset(self, rows=None, cols=None): @@ -1059,12 +1107,11 @@ def subset(self, rows=None, cols=None): raise KeyError if row_no in rows: - _dset.append(row=Row(data_row)) + _dset.append(row=Row(data_row, dset=_dset)) return _dset - class Databook(object): """A book of :class:`Dataset` objects. """ diff --git a/test_tablib.py b/test_tablib.py index e7b7233c..d8d2edea 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -48,7 +48,7 @@ def test_empty_append(self): # Verify width/data self.assertTrue(data.width == len(new_row)) - self.assertTrue(data[0] == new_row) + self.assertTrue(data[0].tuple == new_row) def test_empty_append_with_headers(self): """Verify append() correctly detects mismatch of number of @@ -81,7 +81,7 @@ def test_add_column(self): data.append_col(new_col) - self.assertEqual(data[0], ('kenneth', 'reitz')) + self.assertEqual(data[0].tuple, ('kenneth', 'reitz')) self.assertEqual(data.width, 2) # With Headers @@ -98,7 +98,7 @@ def test_add_column_no_data_no_headers(self): data.append_col(new_col) - self.assertEqual(data[0], tuple([new_col[0]])) + self.assertEqual(tuple(data[0]), tuple([new_col[0]])) self.assertEqual(data.width, 1) self.assertEqual(data.height, len(new_col)) @@ -111,7 +111,7 @@ def test_add_column_with_header_ignored(self): data.append_col(new_col, header='first_name') - self.assertEqual(data[0], tuple([new_col[0]])) + self.assertEqual(tuple(data[0]), tuple([new_col[0]])) self.assertEqual(data.width, 1) self.assertEqual(data.height, len(new_col)) self.assertEqual(data.headers, None) @@ -165,6 +165,42 @@ def test_header_slicing(self): self.assertEqual(self.founders['gpa'], [self.john[2], self.george[2], self.tom[2]]) + def test_lblidx_valid_update(self): + """Verify Dataset's _lblidx attribute is updated on header setting""" + + self.assertFalse(data._lblidx) + data.append(self.john) + self.assertFalse(data._lblidx) + data.headers = self.headers + self.assertTrue(data._lblidx) + + def test_lblidx_non_unique(self): + """Verify Dataset's _lblidx is set to ``False`` if headers has + duplicate labels""" + + self.assertTrue(self.founders._lblidx) + self.founders.headers = ('one', 'one', 'three') + self.assertFalse(self.founders._lblidx) + + def test_label_based_row_item_access(self): + """Verify label based indexing for Rows works""" + + def label_index_callable(dataset, row_index, col_label): + return dataset[row_index][col_label] + + self.founders[0]['last_name'] = 'Jay' + self.assertEqual(self.founders[0]['last_name'], 'Jay') + self.assertEqual(self.founders[0]['last_name'], self.founders[0][1]) + + self.assertRaises(IndexError, label_index_callable, self.founders, 0, + 'middle name') + + # non-unique headers, missing headers: + for headers in [('same', 'same', 'different'), None]: + self.founders.headers = headers + self.assertRaises(TypeError, label_index_callable, self.founders, + 0, 'same') + def test_get_col(self): """Verify getting columns by index""" @@ -184,17 +220,20 @@ def test_data_slicing(self): """Verify slicing by data.""" # Slice individual rows - self.assertEqual(self.founders[0], self.john) - self.assertEqual(self.founders[:1], [self.john]) - self.assertEqual(self.founders[1:2], [self.george]) - self.assertEqual(self.founders[-1], self.tom) + self.assertEqual(self.founders[0].tuple, self.john) + self.assertEqual([r.tuple for r in self.founders[:1]], [self.john]) + self.assertEqual([r.tuple for r in self.founders[1:2]], [self.george]) + self.assertEqual(self.founders[-1].tuple, self.tom) self.assertEqual(self.founders[3:], []) # Slice multiple rows - self.assertEqual(self.founders[:], [self.john, self.george, self.tom]) - self.assertEqual(self.founders[0:2], [self.john, self.george]) - self.assertEqual(self.founders[1:3], [self.george, self.tom]) - self.assertEqual(self.founders[2:], [self.tom]) + self.assertEqual([r.tuple for r in self.founders[:]], + [self.john, self.george, self.tom]) + self.assertEqual([r.tuple for r in self.founders[0:2]], + [self.john, self.george]) + self.assertEqual([r.tuple for r in self.founders[1:3]], + [self.george, self.tom]) + self.assertEqual([r.tuple for r in self.founders[2:]], [self.tom]) def test_row_slicing(self): """Verify Row's __getslice__ method. Issue #184.""" @@ -212,7 +251,8 @@ def test_delete(self): # Delete from front of object del self.founders[0] - self.assertEqual(self.founders[:], [self.george, self.tom]) + self.assertEqual([r.tuple for r in self.founders[:]], + [self.george, self.tom]) # Verify dimensions, width should NOT change self.assertEqual(self.founders.height, 2) @@ -220,7 +260,7 @@ def test_delete(self): # Delete from back of object del self.founders[1] - self.assertEqual(self.founders[:], [self.george]) + self.assertEqual([r.tuple for r in self.founders[:]], [self.george]) # Verify dimensions, width should NOT change self.assertEqual(self.founders.height, 1) @@ -764,6 +804,42 @@ def test_auto_format_detect(self): self.assertEqual(tablib.detect_format(_json), 'json') self.assertEqual(tablib.detect_format(_bunk), None) + def test_row_cmp(self): + """Test Row's ``==`` and ``!=``""" + + data.append(self.founders[1]) + self.assertEqual(data[0], self.founders[1]) + self.assertNotEqual(data[0], self.founders[2]) + data[0].tags.append('tagged') + self.assertNotEqual(data[0], self.founders[1]) + self.founders[1].tags.append('tagged') + self.assertEqual(data[0], self.founders[1]) + + def test_row_add(self): + """ Test Row's ``+``""" + data.append('abc') + data.append('def') + expected = list('abcdef') + self.assertEqual(data[0] + data[1], expected) + self.assertEqual(data[0] + 'def', expected) + self.assertEqual(data[0] + list('def'), expected) + + def test_copy(self): + """Test Dataset's copy() method""" + + self.founders[0].tags.append("Sam's cousin") + copied = self.founders.copy() + + self.assertEqual(self.founders.headers, copied.headers) + self.assertEqual(self.founders.title, copied.title) + for orig_row, copy_row in zip(self.founders, copied): + self.assertEqual(orig_row, copy_row) + + self.assertTrue(all([r._dset is copied for r in copied])) + self.assertFalse(copied is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(copied._data is self.founders._data) + def test_transpose(self): """Transpose a dataset.""" @@ -773,11 +849,17 @@ def test_transpose(self): self.assertEqual(transposed_founders.headers, ["first_name", "John", "George", "Thomas"]) - self.assertEqual(first_row, + self.assertEqual(first_row.tuple, ("last_name", "Adams", "Washington", "Jefferson")) - self.assertEqual(second_row, + self.assertEqual(second_row.tuple, ("gpa", 90, 67, 50)) + self.assertTrue(all([r._dset is transposed_founders + for r in transposed_founders])) + self.assertFalse(transposed_founders is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(transposed_founders._data is self.founders._data) + def test_transpose_multiple_headers(self): data = tablib.Dataset() @@ -802,6 +884,11 @@ def test_row_stacking(self): expected_data = original_data + original_data self.assertEqual(row_stacked[column], expected_data) + self.assertTrue(all([r._dset is row_stacked for r in row_stacked])) + self.assertFalse(row_stacked is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(row_stacked._data is self.founders._data) + def test_column_stacking(self): """Column stacking""" @@ -815,11 +902,17 @@ def test_column_stacking(self): for index, row in enumerate(column_stacked): original_data = self.founders[index] expected_data = original_data + original_data - self.assertEqual(row, expected_data) + self.assertEqual(row.list, expected_data) - self.assertEqual(column_stacked[0], + self.assertEqual(column_stacked[0].tuple, ("John", "Adams", 90, "John", "Adams", 90)) + self.assertTrue(all([r._dset is column_stacked + for r in column_stacked])) + self.assertFalse(column_stacked is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(column_stacked._data is self.founders._data) + def test_sorting(self): """Sort columns.""" @@ -837,24 +930,46 @@ def test_sorting(self): self.assertEqual(second_row, expected_second) self.assertEqual(third_row, expected_third) + # check that sorted_data rows reference correct Dataset object: + self.assertTrue(all([r._dset is sorted_data for r in sorted_data])) + # ensure new dataset is not a shallow copy: + self.assertFalse(sorted_data._data is self.founders._data) + def test_remove_duplicates(self): """Unique Rows.""" self.founders.append(self.john) self.founders.append(self.george) self.founders.append(self.tom) - self.assertEqual(self.founders[0], self.founders[3]) - self.assertEqual(self.founders[1], self.founders[4]) - self.assertEqual(self.founders[2], self.founders[5]) + self.assertEqual(self.founders[0].tuple, self.founders[3].tuple) + self.assertEqual(self.founders[1].tuple, self.founders[4].tuple) + self.assertEqual(self.founders[2].tuple, self.founders[5].tuple) self.assertEqual(self.founders.height, 6) self.founders.remove_duplicates() - self.assertEqual(self.founders[0], self.john) - self.assertEqual(self.founders[1], self.george) - self.assertEqual(self.founders[2], self.tom) + self.assertEqual(self.founders[0].tuple, self.john) + self.assertEqual(self.founders[1].tuple, self.george) + self.assertEqual(self.founders[2].tuple, self.tom) self.assertEqual(self.founders.height, 3) + def test_filter(self): + """Test ``filter`` method""" + self.founders[0].tags.append("sam's cousin") + self.founders[0].tags.append('president') + self.founders[2].tags.append('president') + + filtered = self.founders.filter('president') + + self.assertEqual(filtered.height, 2) + self.assertEqual(filtered[0], self.founders[0]) + self.assertEqual(filtered[1], self.founders[2]) + + self.assertTrue(all([r._dset is filtered for r in filtered])) + self.assertFalse(filtered is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(filtered._data is self.founders._data) + def test_wipe(self): """Purge a dataset.""" @@ -863,13 +978,13 @@ def test_wipe(self): # Verify width/data self.assertTrue(data.width == len(new_row)) - self.assertTrue(data[0] == new_row) + self.assertTrue(data[0].tuple == new_row) data.wipe() new_row = (1, 2, 3, 4) data.append(new_row) self.assertTrue(data.width == len(new_row)) - self.assertTrue(data[0] == new_row) + self.assertTrue(data[0].tuple == new_row) def test_subset(self): """Create a subset of a dataset""" @@ -890,6 +1005,11 @@ def test_subset(self): self.assertEqual(subset._data[0].list, ['John', 90]) self.assertEqual(subset._data[1].list, ['Thomas', 50]) + self.assertTrue(all([r._dset is subset for r in subset])) + self.assertFalse(subset is data) + # ensure new dataset is not a shallow copy: + self.assertFalse(subset._data is data._data) + def test_formatters(self): """Confirm formatters are being triggered."""