|
4 | 4 | from time import sleep |
5 | 5 | from typing import Final, Literal, Self |
6 | 6 |
|
7 | | -import pyexcel |
| 7 | +from pyexcel_io.io import load_data as pyexcel_load_data |
8 | 8 |
|
9 | 9 |
|
10 | 10 | class Spreadsheet: |
@@ -108,27 +108,39 @@ def from_file( |
108 | 108 | if extension == "tsv": |
109 | 109 | file_content = StringIO(Spreadsheet.normalise_newlines(file_content)) |
110 | 110 |
|
111 | | - column_limit = -1 |
112 | | - if column_limit_from_header and extension not in ("xlsx", "xlsm"): |
| 111 | + # why not just use pyexcel.iget_array? this allows us to reuse the same reader |
| 112 | + # from the initial header read for the data reading, which should save some |
| 113 | + # startup cost |
| 114 | + row_iter_dict, reader = pyexcel_load_data( |
| 115 | + file_stream=file_content, |
| 116 | + file_type=extension, |
| 117 | + streaming=True, |
| 118 | + row_limit=(1 if column_limit_from_header else -1), |
| 119 | + ) |
| 120 | + row_iter = next(iter(row_iter_dict.values()), iter(())) |
| 121 | + |
| 122 | + if column_limit_from_header: |
113 | 123 | original_offset = file_content.tell() |
114 | | - header = next(pyexcel.iget_array(file_type=extension, file_stream=file_content, row_limit=1), ()) |
| 124 | + last_nonempty_column = next( |
| 125 | + (i for i, x in reversed(tuple(enumerate(next(row_iter, ())))) if str(x).strip()), None |
| 126 | + ) |
| 127 | + |
| 128 | + # some plugins just read things directly from the file each time, so we need to reset |
| 129 | + # the offset for them to have them read from the beginning again |
115 | 130 | file_content.seek(original_offset) |
116 | 131 |
|
117 | | - last_nonempty_column = next((i for i, x in reversed(tuple(enumerate(header))) if str(x).strip()), None) |
118 | 132 | if last_nonempty_column is not None: |
119 | 133 | if last_nonempty_column >= absolute_column_limit: |
120 | 134 | raise cls.TooManyColumnsError( |
121 | 135 | f"Last non-empty header column ({last_nonempty_column}) " |
122 | 136 | f"is beyond absolute limit of {absolute_column_limit}" |
123 | 137 | ) |
124 | 138 |
|
125 | | - column_limit = max(last_nonempty_column + 1, min_column_limit) |
| 139 | + reader.keywords["column_limit"] = max(last_nonempty_column + 1, min_column_limit) |
| 140 | + reader.keywords["row_limit"] = -1 |
| 141 | + row_iter = next(iter(reader.read_sheet_by_index(0).values())) |
126 | 142 |
|
127 | | - return cls.from_rows( |
128 | | - pyexcel.iget_array(file_type=extension, file_stream=file_content, column_limit=column_limit), |
129 | | - filename, |
130 | | - row_limit=row_limit, |
131 | | - ) |
| 143 | + return cls.from_rows(row_iter, filename, row_limit=row_limit) |
132 | 144 |
|
133 | 145 | @classmethod |
134 | 146 | def from_file_form( |
|
0 commit comments