Skip to content

Commit 2911acc

Browse files
No public description
PiperOrigin-RevId: 741205239
1 parent 009446f commit 2911acc

File tree

1 file changed

+102
-10
lines changed

1 file changed

+102
-10
lines changed

google/colab/sheets.py

Lines changed: 102 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import abc
44
import datetime
5+
import importlib
6+
import operator
57
import google.auth
68
from google.colab import auth
79
import gspread
@@ -12,6 +14,10 @@
1214
_gspread_client = None
1315

1416

17+
_PANDAS = 'pandas'
18+
_POLARS = 'polars'
19+
20+
1521
def _clean_val(val):
1622
if isinstance(val, pd.Timestamp):
1723
return val.isoformat()
@@ -56,6 +62,7 @@ class InteractiveSheet:
5662
url: a string with the url to the sheet
5763
embedded_url: a string with the url to the embedded sheet
5864
storage_strategy: an instance of InteractiveSheetStorageStrategy
65+
backend: A string indicating the backend the interactive sheet uses
5966
"""
6067

6168
def __init__(
@@ -70,6 +77,7 @@ def __init__(
7077
credentials=None,
7178
include_column_headers=True,
7279
display=True,
80+
backend=_PANDAS,
7381
):
7482
"""Initialize a new InteractiveSheet.
7583
@@ -94,6 +102,12 @@ def __init__(
94102
include_column_headers: If True, assume the first row of the sheet is a
95103
header column for both reads and writes.
96104
display: If True, displays the embedded sheet in the cell output.
105+
backend: The dataframe lbrary to use, must be one of `'pandas'` or
106+
`'polars'`. To use polars it must actually be installed.
107+
108+
Raises:
109+
ValueError: When an incompatible `backend` is supplied.
110+
ModuleNotFoundError: When `backend='polars'` but polars is not installed.
97111
"""
98112
if sum([bool(url), bool(sheet_id), bool(title)]) > 1:
99113
raise ValueError(
@@ -125,12 +139,23 @@ def __init__(
125139
self.embedded_url = (
126140
f'{self.sheet.url}/edit?rm=embedded#gid={self.worksheet.id}'
127141
)
128-
129-
if include_column_headers:
130-
self.storage_strategy = HeaderStorageStrategy()
142+
self.backend = backend
143+
if backend == _POLARS:
144+
self.storage_strategy = (
145+
PolarsHeaderStorageStrategy()
146+
if include_column_headers
147+
else PolarsHeaderlessStorageStrategy()
148+
)
149+
elif backend == _PANDAS:
150+
self.storage_strategy = (
151+
HeaderStorageStrategy()
152+
if include_column_headers
153+
else HeaderlessStorageStrategy()
154+
)
131155
else:
132-
self.storage_strategy = HeaderlessStorageStrategy()
133-
156+
raise ValueError(
157+
f"Unrecognized backend '{backend}', use one of 'polars' or 'pandas'."
158+
)
134159
if df is not None:
135160
self.update(df=df)
136161
if display:
@@ -183,19 +208,35 @@ def as_df(self, range_name=None):
183208
a pandas Dataframe with the latest data from the current worksheet
184209
"""
185210
self._ensure_gspread_client()
186-
data = self.storage_strategy.read(self.worksheet, range_name)
187-
return pd.DataFrame(data)
211+
return self.storage_strategy.read(self.worksheet, range_name)
188212

189213
def update(self, df, **kwargs):
190214
"""Update clears the sheet and replaces it with the provided dataframe.
191215
192216
Args:
193217
df: the source data
194218
**kwargs: additional arguments to pass to the gspread update method
219+
220+
Raises:
221+
ValueError: When a pandas dataframe is passed to an instance with
222+
`backend='polars'` or vice versa.
195223
"""
224+
if self.backend == _POLARS and isinstance(df, pd.DataFrame):
225+
raise ValueError(
226+
'Unexpected DataFrame. Got: pandas, want: polars. To use a pandas'
227+
" dataframe with InteractiveSheet you must set backend='pandas' when"
228+
' creating the sheet'
229+
)
230+
if self.backend == _PANDAS and not isinstance(df, pd.DataFrame):
231+
raise ValueError(
232+
'Unexpected DataFrame. Got: polars, want: pandas. To use a polars'
233+
" dataframe with InteractiveSheet you must set backend='polars' when"
234+
' creating the sheet'
235+
)
196236
self._ensure_gspread_client()
197237
self.worksheet.clear()
198-
self.storage_strategy.write(self.worksheet, _to_frame(df), **kwargs)
238+
frame = df if (self.backend == _POLARS) else _to_frame(df)
239+
self.storage_strategy.write(self.worksheet, frame, **kwargs)
199240

200241
def display(self, height=600):
201242
"""Display the embedded sheet in Colab.
@@ -239,12 +280,63 @@ def read(self, worksheet, range_name=None):
239280
data = worksheet.get_values(range_name)
240281
if not data:
241282
return pd.DataFrame()
242-
# Data is a list of lists, i.e. [[col1, col2], [row1, row2], ...], where
243-
# the first element is the column names, the rest are the rows.
283+
# Data is a list of lists, i.e.
284+
# [[header1, header2], [row1col1, row1col2], ...], where the first element
285+
# is the column names, the rest are the rows.
244286
columns = data[0]
245287
rows = data[1:]
246288
return pd.DataFrame(rows, columns=columns)
247289

248290
def write(self, worksheet, df, **kwargs):
249291
data = [list(df.columns)] + [list(r) for _, r in df.iterrows()]
250292
worksheet.update('', data, **kwargs)
293+
294+
295+
class PolarsHeaderlessStorageStrategy(InteractiveSheetStorageStrategy):
296+
"""Read and write operations for sheets with a header row."""
297+
298+
def __init__(self):
299+
try:
300+
self._pl = importlib.import_module('polars')
301+
except ModuleNotFoundError as e:
302+
raise ModuleNotFoundError(
303+
'Polars is not installed. Please install it with `pip install polars`'
304+
) from e
305+
306+
def read(self, worksheet, range_name=None):
307+
data = worksheet.get_values(range_name)
308+
return self._pl.DataFrame(data, orient='row')
309+
310+
def write(self, worksheet, df, **kwargs):
311+
data = [list(r) for r in df.iter_rows()]
312+
worksheet.update('', data, **kwargs)
313+
314+
315+
class PolarsHeaderStorageStrategy(InteractiveSheetStorageStrategy):
316+
"""Read and write operations for sheets without a header row."""
317+
318+
def __init__(self):
319+
try:
320+
self._pl = importlib.import_module('polars')
321+
except ModuleNotFoundError as e:
322+
raise ModuleNotFoundError(
323+
'Polars is not installed. Please install it with `pip install polars`'
324+
) from e
325+
326+
def read(self, worksheet, range_name=None):
327+
data = worksheet.get_values(range_name)
328+
if not data:
329+
return self._pl.DataFrame()
330+
# Data is a list of lists, i.e.
331+
# [[header1, header2], [row1col1, row1col2], ...], where the first element
332+
# is the column names, the rest are the rows.
333+
columns = data[0]
334+
rows = data[1:]
335+
return self._pl.DataFrame(rows, schema=columns, orient='row')
336+
337+
def write(self, worksheet, df, **kwargs):
338+
# gspread json.dumps every cell and doesn't support polars' dates, etc.
339+
# As a result we cast everything that is not a number to a string first.
340+
formatted = df.cast({operator.invert(self._pl.selectors.numeric()): str})
341+
data = [df.columns] + [list(r) for r in formatted.iter_rows()]
342+
worksheet.update('', data, **kwargs)

0 commit comments

Comments
 (0)