Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support column_width in xlsx format #516

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Here is a list of past and present much-appreciated contributors:
Bruno Soares
Claude Paroz
Daniel Santos
Egor Osokin
Erik Youngren
Hugo van Kemenade
Iuri de Silvio
Expand Down
13 changes: 13 additions & 0 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,19 @@ The ``import_set()`` method also supports a ``skip_lines`` parameter that you
can set to a number of lines that should be skipped before starting to read
data.

The ``export_set()`` method supports a ``column_width`` parameter. Depending on the
value passed, the column width will be set accordingly. It can be either ``None``, an integer, or default "adaptive".
andrewgy8 marked this conversation as resolved.
Show resolved Hide resolved
If "adaptive" is passed, the column width will be unique and will be
calculated based on values' length. For example::

data = tablib.Dataset()
data.export('xlsx', column_width='adaptive')



andrewgy8 marked this conversation as resolved.
Show resolved Hide resolved
.. versionchanged:: 3.8.0
The ``column_width`` parameter for ``export_set()`` was added.

.. versionchanged:: 3.1.0

The ``skip_lines`` parameter for ``import_set()`` was added.
Expand Down
40 changes: 38 additions & 2 deletions src/tablib/formats/_xlsx.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
""" Tablib - XLSX Support.
"""

import re
from io import BytesIO

Expand Down Expand Up @@ -35,7 +34,8 @@ def detect(cls, stream):
return False

@classmethod
def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False):
def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-",
escape=False, column_width="adaptive"):
"""Returns XLSX representation of Dataset.

If ``freeze_panes`` is True, Export will freeze panes only after first line.
Expand All @@ -48,6 +48,12 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=F
If ``escape`` is True, formulae will have the leading '=' character removed.
This is a security measure to prevent formulae from executing by default
in exported XLSX files.

If ``column_width`` is set to "adaptive", the column width will be set to the maximum
width of the content in each column. If it is set to an integer, the column width will be
set to that integer value. If it is set to None, the column width will be set as the
default openpyxl.Worksheet width value.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about accepting a list of column widths to be able to set widths per column? It may also be a follow-up improvement PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think that would be a valid iteration on this change 👍


"""
wb = Workbook()
ws = wb.worksheets[0]
Expand All @@ -59,6 +65,8 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=F

cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape)

cls._adapt_column_width(ws, column_width)

stream = BytesIO()
wb.save(stream)
return stream.getvalue()
Expand Down Expand Up @@ -166,3 +174,31 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False):

if escape and cell.data_type == 'f' and cell.value.startswith('='):
cell.value = cell.value.replace("=", "")

@classmethod
def _adapt_column_width(cls, worksheet, width):
if isinstance(width, str) and width != "adaptive":
msg = (
f"Invalid value for column_width: {width}. "
f"Must be 'adaptive' or an integer."
andrewgy8 marked this conversation as resolved.
Show resolved Hide resolved
)
raise ValueError(msg)

if width is None:
return

column_widths = []
if width == "adaptive":
for row in worksheet.values:
for i, cell in enumerate(row):
cell = str(cell)
if len(column_widths) > i:
if len(cell) > column_widths[i]:
column_widths[i] = len(cell)
else:
column_widths.append(len(cell))
else:
column_widths = [width] * worksheet.max_column

for i, column_width in enumerate(column_widths, 1): # start at 1
worksheet.column_dimensions[get_column_letter(i)].width = column_width
42 changes: 42 additions & 0 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,6 +1339,25 @@ def get_format_str(cell):


class XLSXTests(BaseTestCase):
def _helper_export_column_width(self, column_width):
"""check that column width adapts to value length"""
def _get_width(data, input_arg):
xlsx_content = data.export('xlsx', column_width=input_arg)
wb = load_workbook(filename=BytesIO(xlsx_content))
ws = wb.active
return ws.column_dimensions['A'].width

xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx'
with xls_source.open('rb') as fh:
data = tablib.Dataset().load(fh)
width_before = _get_width(data, column_width)
data.append([
'verylongvalue-verylongvalue-verylongvalue-verylongvalue-'
'verylongvalue-verylongvalue-verylongvalue-verylongvalue',
])
width_after = _get_width(data, width_before)
return width_before, width_after

def test_xlsx_format_detect(self):
"""Test the XLSX format detection."""
in_stream = self.founders.xlsx
Expand Down Expand Up @@ -1483,6 +1502,29 @@ def test_xlsx_raise_ValueError_on_cell_write_during_export(self):
wb = load_workbook(filename=BytesIO(_xlsx))
self.assertEqual('[1]', wb.active['A1'].value)

def test_xlsx_column_width_adaptive(self):
""" Test that column width adapts to value length"""
width_before, width_after = self._helper_export_column_width("adaptive")
self.assertEqual(width_before, 11)
self.assertEqual(width_after, 11)

def test_xlsx_column_width_integer(self):
"""Test that column width changes to integer length"""
width_before, width_after = self._helper_export_column_width(10)
self.assertEqual(width_before, 10)
self.assertEqual(width_after, 10)

def test_xlsx_column_width_none(self):
"""Test that column width does not change"""
width_before, width_after = self._helper_export_column_width(None)
self.assertEqual(width_before, 13)
self.assertEqual(width_after, 13)

def test_xlsx_column_width_value_error(self):
"""Raise ValueError if column_width is not a valid input"""
with self.assertRaises(ValueError):
self._helper_export_column_width("invalid input")


class JSONTests(BaseTestCase):
def test_json_format_detect(self):
Expand Down
Loading