@@ -50,7 +50,8 @@ def resample_skeleton(x: 'core.NeuronObject',
50
50
within a neuron, but you may encounter duplicates across neurons.
51
51
- Any non-standard node table columns (e.g. "labels") will be lost.
52
52
- Soma(s) will be pinned to the closest node in the resampled neuron.
53
-
53
+ - We may end up upcasting the data type for node and parent IDs to
54
+ accommodate the new node IDs.
54
55
55
56
Also: be aware that high-resolution neurons will use A LOT of memory.
56
57
@@ -253,13 +254,26 @@ def resample_skeleton(x: 'core.NeuronObject',
253
254
data = new_nodes , columns = ["node_id" , "parent_id" ] + num_cols + non_num_cols
254
255
)
255
256
256
- # Convert columns to appropriate dtypes
257
+ # At this point node and parent IDs will be 64 bit integers and x/y/z columns will
258
+ # be float 64. We will convert them back to the original dtypes but we have to
259
+ # be careful with node & parent IDs to avoid overflows if the original datatype
260
+ # can't accommodate the new IDs.
261
+
262
+ # Gather the original dtypes
257
263
dtypes = {
258
264
k : x .nodes [k ].dtype for k in ["node_id" , "parent_id" ] + num_cols + non_num_cols
259
265
}
260
266
261
- for cols in new_nodes .columns :
262
- new_nodes = new_nodes .astype (dtypes , errors = "ignore" )
267
+ # Check for overflow
268
+ for col in ("node_id" , "parent_id" ):
269
+ # If there is an overflow downcast to smallest possible dtype
270
+ # N.B. we could also check for underflow but that's less likely
271
+ if new_nodes [col ].max () >= np .iinfo (np .int32 ).max :
272
+ new_nodes [col ] = pd .to_nunmeric (new_nodes [col ], downcast = "integer" )
273
+ dtypes [col ] = new_nodes [col ].dtype # Update dtype
274
+
275
+ # Now cast the rest
276
+ new_nodes = new_nodes .astype (dtypes , errors = "ignore" )
263
277
264
278
# Remove duplicate nodes (branch points)
265
279
new_nodes = new_nodes [~ new_nodes .node_id .duplicated ()]
0 commit comments