From 204ecbe7f1876c7dc7f1d5f428294cc943ea1a8a Mon Sep 17 00:00:00 2001 From: markus Date: Sun, 1 Jan 2017 23:06:48 +0100 Subject: [PATCH 1/5] Allow label based indexing in Rows (incl. test updates) --- .gitignore | 1 + tablib/core.py | 109 ++++++++++++++++++++++--------- test_tablib.py | 171 +++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 225 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index 379075b3..3f08a73d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ profile # vi noise *.swp +*~ docs/_build/* coverage.xml nosetests.xml diff --git a/tablib/core.py b/tablib/core.py index c5faad3a..a8536288 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -9,7 +9,7 @@ :license: MIT, see LICENSE for more details. """ -from copy import copy +from copy import deepcopy from operator import itemgetter from tablib import formats @@ -27,13 +27,16 @@ class Row(object): - """Internal Row object. Mainly used for filtering.""" + """Internal Row object. Mainly used for filtering. Note: To allow label + based indexing Row needs to be aware of the Dataset it belongs to. This is + passed to the constructor's `dset` argument.""" - __slots__ = ['_row', 'tags'] + __slots__ = ['_row', 'tags', '_dset'] - def __init__(self, row=list(), tags=list()): + def __init__(self, row=list(), tags=list(), dset=None): self._row = list(row) self.tags = list(tags) + self._dset = dset def __iter__(self): return (col for col in self._row) @@ -47,14 +50,47 @@ def __repr__(self): def __getslice__(self, i, j): return self._row[i:j] - def __getitem__(self, i): - return self._row[i] + def _index(self, key): + """Returns index for ``key`` (string or int). Raises TypeError if + ``key`` is string bt Dataset has no unique headers set and IndexError + if ``key`` is not in headers.""" - def __setitem__(self, i, value): - self._row[i] = value + if isinstance(key, (str, unicode)): + if not self._dset._lblidx: + raise TypeError("Cannot access element by key '{}' - Dataset" + " headers not suitable for indexing".format(key)) + try: + i = self._dset.headers.index(key) + except ValueError: + raise IndexError("'{}' not in Dataset headers".format(key)) + else: + i = key - def __delitem__(self, i): - del self._row[i] + return i + + def __getitem__(self, key): + return self._row[self._index(key)] + + def __setitem__(self, key, value): + self._row[self._index(key)] = value + + def __delitem__(self, key): + del self._row[self._index(key)] + + def __add__(self, other): + """Returns concatenation as plain list. ``other`` can be Row or a + sequence type""" + return self._row + list(other) + + def __eq__(self, other): + """Requires ``_row`` and ``tags`` attributes to be equal but not + headers of respective owning Datasets""" + if not isinstance(other, Row): + raise TypeError("Can't compare Row to %s" % type(other)) + return self._row == other._row and self.tags == other.tags + + def __ne__(self, other): + return not self == other def __getstate__(self): @@ -105,8 +141,6 @@ def has_tag(self, tag): return bool(len(set(tag) & set(self.tags))) - - class Dataset(object): """The :class:`Dataset` object is the heart of Tablib. It provides all core functionality. @@ -157,8 +191,9 @@ class Dataset(object): _formats = {} def __init__(self, *args, **kwargs): - self._data = list(Row(arg) for arg in args) + self._data = list(Row(arg, dset=self) for arg in args) self.__headers = None + self._lblidx = False # ('title', index) tuples self._separators = [] @@ -172,13 +207,11 @@ def __init__(self, *args, **kwargs): self._register_formats() - def __len__(self): return self.height - def __getitem__(self, key): - if isinstance(key, str) or isinstance(key, unicode): + if isinstance(key, (str, unicode)): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data return [row[pos] for row in self._data] @@ -187,13 +220,13 @@ def __getitem__(self, key): else: _results = self._data[key] if isinstance(_results, Row): - return _results.tuple + return _results else: - return [result.tuple for result in _results] + return [result for result in _results] def __setitem__(self, key, value): self._validate(value) - self._data[key] = Row(value) + self._data[key] = Row(value, dset=self) def __delitem__(self, key): @@ -339,10 +372,13 @@ def _set_headers(self, collection): if collection: try: self.__headers = list(collection) + self._lblidx = (len(set(collection)) == len(collection)) except TypeError: + self._lblidx = False raise TypeError else: self.__headers = None + self._lblidx = False headers = property(_get_headers, _set_headers) @@ -380,14 +416,14 @@ def _set_dict(self, pickle): if isinstance(pickle[0], list): self.wipe() for row in pickle: - self.append(Row(row)) + self.append(Row(row, dset=self)) # if list of objects elif isinstance(pickle[0], dict): self.wipe() self.headers = list(pickle[0].keys()) for row in pickle: - self.append(Row(list(row.values()))) + self.append(Row(list(row.values()), dset=self)) else: raise UnsupportedFormat @@ -644,7 +680,7 @@ def insert(self, index, row, tags=list()): """ self._validate(row) - self._data.insert(index, Row(row, tags=tags)) + self._data.insert(index, Row(row, tags=tags, dset=self)) def rpush(self, row, tags=list()): @@ -765,8 +801,7 @@ def insert_col(self, index, col=None, header=None): row.insert(index, col[i]) self._data[i] = row else: - self._data = [Row([row]) for row in col] - + self._data = [Row([row], dset=self) for row in col] def rpush_col(self, col, header=None): @@ -849,7 +884,7 @@ def filter(self, tag): """Returns a new instance of the :class:`Dataset`, excluding any rows that do not contain the given :ref:`tags `. """ - _dset = copy(self) + _dset = self.copy() _dset._data = [row for row in _dset._data if row.has_tag(tag)] return _dset @@ -918,11 +953,22 @@ def transpose(self): # Adding the column name as now they're a regular column # Use `get_col(index)` in case there are repeated values row_data = [column] + self.get_col(index) - row_data = Row(row_data) + row_data = Row(row_data, dset=self) _dset.append(row=row_data) return _dset + def copy(self): + """Return copy with each Row's Dataset reference set to the new + object""" + + _dset = deepcopy(self) + for row in _dset._data: + row._dset = _dset + + return _dset + + def stack(self, other): """Stack two :class:`Dataset` instances together by joining at the row level, and return new combined @@ -934,14 +980,17 @@ def stack(self, other): if self.width != other.width: raise InvalidDimensions - # Copy the source data - _dset = copy(self) + # Copy the source data (updates Dataset reference in Rows) + _dset = self.copy() + _dset.extend(other._data) + """ rows_to_stack = [row for row in _dset._data] other_rows = [row for row in other._data] rows_to_stack.extend(other_rows) _dset._data = rows_to_stack + """ return _dset @@ -991,6 +1040,7 @@ def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None + self._lblidx = None def subset(self, rows=None, cols=None): @@ -1028,12 +1078,11 @@ def subset(self, rows=None, cols=None): raise KeyError if row_no in rows: - _dset.append(row=Row(data_row)) + _dset.append(row=Row(data_row, dset=_dset)) return _dset - class Databook(object): """A book of :class:`Dataset` objects. """ diff --git a/test_tablib.py b/test_tablib.py index 6aa4be43..16193bba 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -46,7 +46,7 @@ def test_empty_append(self): # Verify width/data self.assertTrue(data.width == len(new_row)) - self.assertTrue(data[0] == new_row) + self.assertTrue(data[0].tuple == new_row) def test_empty_append_with_headers(self): """Verify append() correctly detects mismatch of number of @@ -79,7 +79,7 @@ def test_add_column(self): data.append_col(new_col) - self.assertEqual(data[0], ('kenneth', 'reitz')) + self.assertEqual(tuple(data[0]), ('kenneth', 'reitz')) self.assertEqual(data.width, 2) # With Headers @@ -96,7 +96,7 @@ def test_add_column_no_data_no_headers(self): data.append_col(new_col) - self.assertEqual(data[0], tuple([new_col[0]])) + self.assertEqual(tuple(data[0]), tuple([new_col[0]])) self.assertEqual(data.width, 1) self.assertEqual(data.height, len(new_col)) @@ -109,7 +109,7 @@ def test_add_column_with_header_ignored(self): data.append_col(new_col, header='first_name') - self.assertEqual(data[0], tuple([new_col[0]])) + self.assertEqual(tuple(data[0]), tuple([new_col[0]])) self.assertEqual(data.width, 1) self.assertEqual(data.height, len(new_col)) self.assertEqual(data.headers, None) @@ -163,6 +163,41 @@ def test_header_slicing(self): self.assertEqual(self.founders['gpa'], [self.john[2], self.george[2], self.tom[2]]) + def test_lblidx_valid_update(self): + """Verify Dataset's _lblidx attribute is updated on header setting""" + + self.assertFalse(data._lblidx) + data.append(self.john) + self.assertFalse(data._lblidx) + data.headers = self.headers + self.assertTrue(data._lblidx) + + def test_lblidx_non_unique(self): + """Verify Dataset's _lblidx is set to ``False`` if headers has + duplicate labels""" + + self.assertTrue(self.founders._lblidx) + self.founders.headers = ('one', 'one', 'three') + self.assertFalse(self.founders._lblidx) + + def test_label_based_row_item_access(self): + """Verify label based indexing for Rows works""" + + self.founders[0]['last_name'] = 'Jay' + self.assertEqual(self.founders[0]['last_name'], 'Jay') + self.assertEqual(self.founders[0]['last_name'], self.founders[0][1]) + + with self.assertRaises(IndexError, + msg="'middle_name' not in Dataset headers"): + self.founders[0]['middle_name'] = 'Quincy' + + # non-unique headers, missing headers: + for headers in [('same', 'same', 'different'), None]: + self.founders.headers = headers + with self.assertRaises(TypeError, msg="Cannot access element by" + " key '{}' - Dataset headers not suitable for indexing"): + self.founders[0]['same'] + def test_get_col(self): """Verify getting columns by index""" @@ -182,17 +217,20 @@ def test_data_slicing(self): """Verify slicing by data.""" # Slice individual rows - self.assertEqual(self.founders[0], self.john) - self.assertEqual(self.founders[:1], [self.john]) - self.assertEqual(self.founders[1:2], [self.george]) - self.assertEqual(self.founders[-1], self.tom) + self.assertEqual(self.founders[0].tuple, self.john) + self.assertEqual([r.tuple for r in self.founders[:1]], [self.john]) + self.assertEqual([r.tuple for r in self.founders[1:2]], [self.george]) + self.assertEqual(self.founders[-1].tuple, self.tom) self.assertEqual(self.founders[3:], []) # Slice multiple rows - self.assertEqual(self.founders[:], [self.john, self.george, self.tom]) - self.assertEqual(self.founders[0:2], [self.john, self.george]) - self.assertEqual(self.founders[1:3], [self.george, self.tom]) - self.assertEqual(self.founders[2:], [self.tom]) + self.assertEqual([r.tuple for r in self.founders[:]], + [self.john, self.george, self.tom]) + self.assertEqual([r.tuple for r in self.founders[0:2]], + [self.john, self.george]) + self.assertEqual([r.tuple for r in self.founders[1:3]], + [self.george, self.tom]) + self.assertEqual([r.tuple for r in self.founders[2:]], [self.tom]) def test_row_slicing(self): """Verify Row's __getslice__ method. Issue #184.""" @@ -210,7 +248,8 @@ def test_delete(self): # Delete from front of object del self.founders[0] - self.assertEqual(self.founders[:], [self.george, self.tom]) + self.assertEqual([r.tuple for r in self.founders[:]], + [self.george, self.tom]) # Verify dimensions, width should NOT change self.assertEqual(self.founders.height, 2) @@ -218,7 +257,7 @@ def test_delete(self): # Delete from back of object del self.founders[1] - self.assertEqual(self.founders[:], [self.george]) + self.assertEqual([r.tuple for r in self.founders[:]], [self.george]) # Verify dimensions, width should NOT change self.assertEqual(self.founders.height, 1) @@ -714,6 +753,42 @@ def test_auto_format_detect(self): self.assertEqual(tablib.detect_format(_json), 'json') self.assertEqual(tablib.detect_format(_bunk), None) + def test_row_cmp(self): + """Test Row's ``==`` and ``!=``""" + + data.append(self.founders[1]) + self.assertEqual(data[0], self.founders[1]) + self.assertNotEqual(data[0], self.founders[2]) + data[0].tags.append('tagged') + self.assertNotEqual(data[0], self.founders[1]) + self.founders[1].tags.append('tagged') + self.assertEqual(data[0], self.founders[1]) + + def test_row_add(self): + """ Test Row's ``+``""" + data.append('abc') + data.append('def') + expected = list('abcdef') + self.assertEqual(data[0] + data[1], expected) + self.assertEqual(data[0] + 'def', expected) + self.assertEqual(data[0] + list('def'), expected) + + def test_copy(self): + """Test Dataset's copy() method""" + + self.founders[0].tags.append("Sam's cousin") + copied = self.founders.copy() + + self.assertEqual(self.founders.headers, copied.headers) + self.assertEqual(self.founders.title, copied.title) + for orig_row, copy_row in zip(self.founders, copied): + self.assertEqual(orig_row, copy_row) + + self.assertTrue(all([r._dset is copied for r in copied])) + self.assertFalse(copied is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(copied._data is self.founders._data) + def test_transpose(self): """Transpose a dataset.""" @@ -723,11 +798,17 @@ def test_transpose(self): self.assertEqual(transposed_founders.headers, ["first_name", "John", "George", "Thomas"]) - self.assertEqual(first_row, + self.assertEqual(first_row.tuple, ("last_name", "Adams", "Washington", "Jefferson")) - self.assertEqual(second_row, + self.assertEqual(second_row.tuple, ("gpa", 90, 67, 50)) + self.assertTrue(all([r._dset is transposed_founders + for r in transposed_founders])) + self.assertFalse(transposed_founders is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(transposed_founders._data is self.founders._data) + def test_transpose_multiple_headers(self): data = tablib.Dataset() @@ -752,6 +833,11 @@ def test_row_stacking(self): expected_data = original_data + original_data self.assertEqual(row_stacked[column], expected_data) + self.assertTrue(all([r._dset is row_stacked for r in row_stacked])) + self.assertFalse(row_stacked is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(row_stacked._data is self.founders._data) + def test_column_stacking(self): """Column stacking""" @@ -765,11 +851,17 @@ def test_column_stacking(self): for index, row in enumerate(column_stacked): original_data = self.founders[index] expected_data = original_data + original_data - self.assertEqual(row, expected_data) + self.assertEqual(row.list, expected_data) - self.assertEqual(column_stacked[0], + self.assertEqual(tuple(column_stacked[0]), ("John", "Adams", 90, "John", "Adams", 90)) + self.assertTrue(all([r._dset is column_stacked + for r in column_stacked])) + self.assertFalse(column_stacked is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(column_stacked._data is self.founders._data) + def test_sorting(self): """Sort columns.""" @@ -787,24 +879,46 @@ def test_sorting(self): self.assertEqual(second_row, expected_second) self.assertEqual(third_row, expected_third) + # check that sorted_data rows reference correct Dataset object: + self.assertTrue(all([r._dset is sorted_data for r in sorted_data])) + # ensure new dataset is not a shallow copy: + self.assertFalse(sorted_data._data is self.founders._data) + def test_remove_duplicates(self): """Unique Rows.""" self.founders.append(self.john) self.founders.append(self.george) self.founders.append(self.tom) - self.assertEqual(self.founders[0], self.founders[3]) - self.assertEqual(self.founders[1], self.founders[4]) - self.assertEqual(self.founders[2], self.founders[5]) + self.assertEqual(self.founders[0].tuple, self.founders[3].tuple) + self.assertEqual(self.founders[1].tuple, self.founders[4].tuple) + self.assertEqual(self.founders[2].tuple, self.founders[5].tuple) self.assertEqual(self.founders.height, 6) self.founders.remove_duplicates() - self.assertEqual(self.founders[0], self.john) - self.assertEqual(self.founders[1], self.george) - self.assertEqual(self.founders[2], self.tom) + self.assertEqual(self.founders[0].tuple, self.john) + self.assertEqual(self.founders[1].tuple, self.george) + self.assertEqual(self.founders[2].tuple, self.tom) self.assertEqual(self.founders.height, 3) + def test_filter(self): + """Test ``filter`` method""" + self.founders[0].tags.append("sam's cousin") + self.founders[0].tags.append('president') + self.founders[2].tags.append('president') + + filtered = self.founders.filter('president') + + self.assertEqual(filtered.height, 2) + self.assertEqual(filtered[0], self.founders[0]) + self.assertEqual(filtered[1], self.founders[2]) + + self.assertTrue(all([r._dset is filtered for r in filtered])) + self.assertFalse(filtered is self.founders) + # ensure new dataset is not a shallow copy: + self.assertFalse(filtered._data is self.founders._data) + def test_wipe(self): """Purge a dataset.""" @@ -813,13 +927,13 @@ def test_wipe(self): # Verify width/data self.assertTrue(data.width == len(new_row)) - self.assertTrue(data[0] == new_row) + self.assertTrue(data[0].tuple == new_row) data.wipe() new_row = (1, 2, 3, 4) data.append(new_row) self.assertTrue(data.width == len(new_row)) - self.assertTrue(data[0] == new_row) + self.assertTrue(data[0].tuple == new_row) def test_subset(self): """Create a subset of a dataset""" @@ -840,6 +954,11 @@ def test_subset(self): self.assertEqual(subset._data[0].list, ['John', 90]) self.assertEqual(subset._data[1].list, ['Thomas', 50]) + self.assertTrue(all([r._dset is subset for r in subset])) + self.assertFalse(subset is data) + # ensure new dataset is not a shallow copy: + self.assertFalse(subset._data is data._data) + def test_formatters(self): """Confirm formatters are being triggered.""" From a79f6386044c02332d43aef24dcfdb415fef3b53 Mon Sep 17 00:00:00 2001 From: markus Date: Mon, 2 Jan 2017 10:38:33 +0100 Subject: [PATCH 2/5] Change assertRaises for Python 2.6 compatibility --- test_tablib.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test_tablib.py b/test_tablib.py index 16193bba..6ff786e7 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -79,7 +79,7 @@ def test_add_column(self): data.append_col(new_col) - self.assertEqual(tuple(data[0]), ('kenneth', 'reitz')) + self.assertEqual(data[0].tuple, ('kenneth', 'reitz')) self.assertEqual(data.width, 2) # With Headers @@ -183,20 +183,21 @@ def test_lblidx_non_unique(self): def test_label_based_row_item_access(self): """Verify label based indexing for Rows works""" + def label_index_callable(dataset, row_index, col_label): + return dataset[row_index][col_label] + self.founders[0]['last_name'] = 'Jay' self.assertEqual(self.founders[0]['last_name'], 'Jay') self.assertEqual(self.founders[0]['last_name'], self.founders[0][1]) - with self.assertRaises(IndexError, - msg="'middle_name' not in Dataset headers"): - self.founders[0]['middle_name'] = 'Quincy' + self.assertRaises(IndexError, label_index_callable, self.founders, 0, + 'middle name') # non-unique headers, missing headers: for headers in [('same', 'same', 'different'), None]: self.founders.headers = headers - with self.assertRaises(TypeError, msg="Cannot access element by" - " key '{}' - Dataset headers not suitable for indexing"): - self.founders[0]['same'] + self.assertRaises(TypeError, label_index_callable, self.founders, + 0, 'same') def test_get_col(self): """Verify getting columns by index""" @@ -853,7 +854,7 @@ def test_column_stacking(self): expected_data = original_data + original_data self.assertEqual(row.list, expected_data) - self.assertEqual(tuple(column_stacked[0]), + self.assertEqual(column_stacked[0].tuple, ("John", "Adams", 90, "John", "Adams", 90)) self.assertTrue(all([r._dset is column_stacked From 7fba34669d9702ab80e04f4c97b95284d15dff0f Mon Sep 17 00:00:00 2001 From: markus Date: Mon, 2 Jan 2017 10:45:19 +0100 Subject: [PATCH 3/5] Change format strings for Python 2.6 compatibility --- tablib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index a8536288..a370b37b 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -57,12 +57,12 @@ def _index(self, key): if isinstance(key, (str, unicode)): if not self._dset._lblidx: - raise TypeError("Cannot access element by key '{}' - Dataset" + raise TypeError("Cannot access element by key '{0}' - Dataset" " headers not suitable for indexing".format(key)) try: i = self._dset.headers.index(key) except ValueError: - raise IndexError("'{}' not in Dataset headers".format(key)) + raise IndexError("'{0}' not in Dataset headers".format(key)) else: i = key From c94658e2c937763366ec57dc0f4790114f471af6 Mon Sep 17 00:00:00 2001 From: markus Date: Sat, 16 Mar 2019 14:39:06 +0100 Subject: [PATCH 4/5] Resolve merge conflict with master --- tablib/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index a370b37b..bb70b919 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -9,12 +9,13 @@ :license: MIT, see LICENSE for more details. """ +from collections import OrderedDict from copy import deepcopy from operator import itemgetter from tablib import formats -from tablib.compat import OrderedDict, unicode +from tablib.compat import unicode __title__ = 'tablib' From 80ae28c7c0ae408ff995839713c478e4ceebdcf2 Mon Sep 17 00:00:00 2001 From: markus Date: Sun, 17 Mar 2019 11:07:41 +0100 Subject: [PATCH 5/5] fix `has_tag` method for correct unicode handling --- tablib/core.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 4f3affa5..a3b9a41c 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -136,10 +136,8 @@ def has_tag(self, tag): if tag == None: return False - elif isinstance(tag, str): - return (tag in self.tags) else: - return bool(len(set(tag) & set(self.tags))) + return (tag in self.tags) class Dataset(object):