23
23
class Cursor ():
24
24
"""
25
25
Cursor for retreiving rows from a table
26
+ buffer_rows can be used to control how many rows
27
+ will be fetched from the server
26
28
"""
27
- def __init__ (self , table , query = None , start = None , stop = None ):
29
+ def __init__ (self , table , query = None , start = None , stop = None , buffer_rows = None ):
28
30
self ._table = table
29
31
self ._query = query
32
+ DEFAULT_BUFFER_BYTES = 1000000
33
+ if buffer_rows is None :
34
+ buffer_rows = DEFAULT_BUFFER_BYTES // table .dtype .itemsize
35
+ if buffer_rows < 1 :
36
+ buffer_rows = 1
37
+ self ._buffer_rows = buffer_rows
38
+
30
39
if start is None :
31
40
self ._start = 0
32
41
else :
@@ -41,33 +50,30 @@ def __iter__(self):
41
50
42
51
BEWARE: Modifications to the yielded data are *NOT* written to file.
43
52
"""
44
- nrows = self ._table .nrows
45
- # to reduce round trips, grab BUFFER_SIZE items at a time
46
- # TBD: set buffersize based on size of each row
47
- BUFFER_SIZE = 10000
53
+ nrows = self ._stop - self ._start
48
54
49
55
arr = None
50
56
query_complete = False
51
57
52
- for indx in range (self ._start , self ._stop ):
53
- if indx % BUFFER_SIZE == 0 :
58
+ for indx in range (self ._stop - self ._start ):
59
+ if indx % self . _buffer_rows == 0 :
54
60
# grab another buffer
55
- read_count = BUFFER_SIZE
61
+ read_count = self . _buffer_rows
56
62
if nrows - indx < read_count :
57
63
read_count = nrows - indx
58
64
if self ._query is None :
59
-
60
- arr = self ._table [indx :read_count + indx ]
65
+ print ( "read row count:" , ( read_count + indx + self . _start ) - ( indx + self . _start ))
66
+ arr = self ._table [indx + self . _start :read_count + indx + self . _start ]
61
67
else :
62
68
# call table to return query result
63
69
if query_complete :
64
70
arr = None # nothing more to fetch
65
71
else :
66
- arr = self ._table .read_where (self ._query , start = indx , limit = read_count )
72
+ arr = self ._table .read_where (self ._query , start = indx + self . _start , limit = read_count )
67
73
if arr is not None and arr .shape [0 ] < read_count :
68
74
query_complete = True # we've gotten all the rows
69
- if arr is not None and indx % BUFFER_SIZE < arr .shape [0 ]:
70
- yield arr [indx % BUFFER_SIZE ]
75
+ if arr is not None and indx % self . _buffer_rows < arr .shape [0 ]:
76
+ yield arr [indx % self . _buffer_rows ]
71
77
72
78
class Table (Dataset ):
73
79
0 commit comments