2
2
3
3
import abc
4
4
import datetime
5
+ import importlib
6
+ import operator
5
7
import google .auth
6
8
from google .colab import auth
7
9
import gspread
12
14
_gspread_client = None
13
15
14
16
17
+ _PANDAS = 'pandas'
18
+ _POLARS = 'polars'
19
+
20
+
15
21
def _clean_val (val ):
16
22
if isinstance (val , pd .Timestamp ):
17
23
return val .isoformat ()
@@ -56,6 +62,7 @@ class InteractiveSheet:
56
62
url: a string with the url to the sheet
57
63
embedded_url: a string with the url to the embedded sheet
58
64
storage_strategy: an instance of InteractiveSheetStorageStrategy
65
+ backend: A string indicating the backend the interactive sheet uses
59
66
"""
60
67
61
68
def __init__ (
@@ -70,6 +77,7 @@ def __init__(
70
77
credentials = None ,
71
78
include_column_headers = True ,
72
79
display = True ,
80
+ backend = _PANDAS ,
73
81
):
74
82
"""Initialize a new InteractiveSheet.
75
83
@@ -94,6 +102,12 @@ def __init__(
94
102
include_column_headers: If True, assume the first row of the sheet is a
95
103
header column for both reads and writes.
96
104
display: If True, displays the embedded sheet in the cell output.
105
+ backend: The dataframe lbrary to use, must be one of `'pandas'` or
106
+ `'polars'`. To use polars it must actually be installed.
107
+
108
+ Raises:
109
+ ValueError: When an incompatible `backend` is supplied.
110
+ ModuleNotFoundError: When `backend='polars'` but polars is not installed.
97
111
"""
98
112
if sum ([bool (url ), bool (sheet_id ), bool (title )]) > 1 :
99
113
raise ValueError (
@@ -125,12 +139,23 @@ def __init__(
125
139
self .embedded_url = (
126
140
f'{ self .sheet .url } /edit?rm=embedded#gid={ self .worksheet .id } '
127
141
)
128
-
129
- if include_column_headers :
130
- self .storage_strategy = HeaderStorageStrategy ()
142
+ self .backend = backend
143
+ if backend == _POLARS :
144
+ self .storage_strategy = (
145
+ PolarsHeaderStorageStrategy ()
146
+ if include_column_headers
147
+ else PolarsHeaderlessStorageStrategy ()
148
+ )
149
+ elif backend == _PANDAS :
150
+ self .storage_strategy = (
151
+ HeaderStorageStrategy ()
152
+ if include_column_headers
153
+ else HeaderlessStorageStrategy ()
154
+ )
131
155
else :
132
- self .storage_strategy = HeaderlessStorageStrategy ()
133
-
156
+ raise ValueError (
157
+ f"Unrecognized backend '{ backend } ', use one of 'polars' or 'pandas'."
158
+ )
134
159
if df is not None :
135
160
self .update (df = df )
136
161
if display :
@@ -183,19 +208,35 @@ def as_df(self, range_name=None):
183
208
a pandas Dataframe with the latest data from the current worksheet
184
209
"""
185
210
self ._ensure_gspread_client ()
186
- data = self .storage_strategy .read (self .worksheet , range_name )
187
- return pd .DataFrame (data )
211
+ return self .storage_strategy .read (self .worksheet , range_name )
188
212
189
213
def update (self , df , ** kwargs ):
190
214
"""Update clears the sheet and replaces it with the provided dataframe.
191
215
192
216
Args:
193
217
df: the source data
194
218
**kwargs: additional arguments to pass to the gspread update method
219
+
220
+ Raises:
221
+ ValueError: When a pandas dataframe is passed to an instance with
222
+ `backend='polars'` or vice versa.
195
223
"""
224
+ if self .backend == _POLARS and isinstance (df , pd .DataFrame ):
225
+ raise ValueError (
226
+ 'Unexpected DataFrame. Got: pandas, want: polars. To use a pandas'
227
+ " dataframe with InteractiveSheet you must set backend='pandas' when"
228
+ ' creating the sheet'
229
+ )
230
+ if self .backend == _PANDAS and not isinstance (df , pd .DataFrame ):
231
+ raise ValueError (
232
+ 'Unexpected DataFrame. Got: polars, want: pandas. To use a polars'
233
+ " dataframe with InteractiveSheet you must set backend='polars' when"
234
+ ' creating the sheet'
235
+ )
196
236
self ._ensure_gspread_client ()
197
237
self .worksheet .clear ()
198
- self .storage_strategy .write (self .worksheet , _to_frame (df ), ** kwargs )
238
+ frame = df if (self .backend == _POLARS ) else _to_frame (df )
239
+ self .storage_strategy .write (self .worksheet , frame , ** kwargs )
199
240
200
241
def display (self , height = 600 ):
201
242
"""Display the embedded sheet in Colab.
@@ -239,12 +280,63 @@ def read(self, worksheet, range_name=None):
239
280
data = worksheet .get_values (range_name )
240
281
if not data :
241
282
return pd .DataFrame ()
242
- # Data is a list of lists, i.e. [[col1, col2], [row1, row2], ...], where
243
- # the first element is the column names, the rest are the rows.
283
+ # Data is a list of lists, i.e.
284
+ # [[header1, header2], [row1col1, row1col2], ...], where the first element
285
+ # is the column names, the rest are the rows.
244
286
columns = data [0 ]
245
287
rows = data [1 :]
246
288
return pd .DataFrame (rows , columns = columns )
247
289
248
290
def write (self , worksheet , df , ** kwargs ):
249
291
data = [list (df .columns )] + [list (r ) for _ , r in df .iterrows ()]
250
292
worksheet .update ('' , data , ** kwargs )
293
+
294
+
295
+ class PolarsHeaderlessStorageStrategy (InteractiveSheetStorageStrategy ):
296
+ """Read and write operations for sheets with a header row."""
297
+
298
+ def __init__ (self ):
299
+ try :
300
+ self ._pl = importlib .import_module ('polars' )
301
+ except ModuleNotFoundError as e :
302
+ raise ModuleNotFoundError (
303
+ 'Polars is not installed. Please install it with `pip install polars`'
304
+ ) from e
305
+
306
+ def read (self , worksheet , range_name = None ):
307
+ data = worksheet .get_values (range_name )
308
+ return self ._pl .DataFrame (data , orient = 'row' )
309
+
310
+ def write (self , worksheet , df , ** kwargs ):
311
+ data = [list (r ) for r in df .iter_rows ()]
312
+ worksheet .update ('' , data , ** kwargs )
313
+
314
+
315
+ class PolarsHeaderStorageStrategy (InteractiveSheetStorageStrategy ):
316
+ """Read and write operations for sheets without a header row."""
317
+
318
+ def __init__ (self ):
319
+ try :
320
+ self ._pl = importlib .import_module ('polars' )
321
+ except ModuleNotFoundError as e :
322
+ raise ModuleNotFoundError (
323
+ 'Polars is not installed. Please install it with `pip install polars`'
324
+ ) from e
325
+
326
+ def read (self , worksheet , range_name = None ):
327
+ data = worksheet .get_values (range_name )
328
+ if not data :
329
+ return self ._pl .DataFrame ()
330
+ # Data is a list of lists, i.e.
331
+ # [[header1, header2], [row1col1, row1col2], ...], where the first element
332
+ # is the column names, the rest are the rows.
333
+ columns = data [0 ]
334
+ rows = data [1 :]
335
+ return self ._pl .DataFrame (rows , schema = columns , orient = 'row' )
336
+
337
+ def write (self , worksheet , df , ** kwargs ):
338
+ # gspread json.dumps every cell and doesn't support polars' dates, etc.
339
+ # As a result we cast everything that is not a number to a string first.
340
+ formatted = df .cast ({operator .invert (self ._pl .selectors .numeric ()): str })
341
+ data = [df .columns ] + [list (r ) for r in formatted .iter_rows ()]
342
+ worksheet .update ('' , data , ** kwargs )
0 commit comments