saulpw · maxfl · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025
diff --git a/sample_data/arrays.hdf5 b/sample_data/arrays.hdf5
diff --git a/sample_data/arrays.npz b/sample_data/arrays.npz
diff --git a/visidata/loaders/hdf5.py b/visidata/loaders/hdf5.py
@@ -1,13 +1,17 @@
 from visidata import VisiData, vd, Sheet, Path, Column, ItemColumn, BaseSheet, anytype
+from itertools import chain
 
 @VisiData.api
 def open_h5(vd, p):
     return Hdf5ObjSheet(p.base_stem, source=p)
 
 VisiData.open_hdf5 = VisiData.open_h5
 
+vd.option('hdf5_matrix_enumerate', False, 'enumerate matrix rows and columns')
+
 class Hdf5ObjSheet(Sheet):
     'Support sheets in HDF5 format.'
+
     def iterload(self):
         h5py = vd.importExternal('h5py')
         source = self.source
@@ -26,25 +30,39 @@ def iterload(self):
             for k, v in source.items():
                 yield Hdf5ObjSheet(self.name, k, source=v)
         elif isinstance(source, h5py.Dataset):
-            if len(source.shape) == 1:
+            if len(source.shape)==1:
                 if source.dtype.names:
                     for i, (colname, fmt, *_) in enumerate(source.dtype.descr):
-                        self.addColumn(ItemColumn(colname, i, type=_guess_type(fmt)))
+                        if not colname:
+                            colname = f"col{i}"
+                        ctype = _guess_type(fmt)
+                        self.addColumn(ItemColumn(colname, i, type=ctype))
                     yield from source  # copy
                 else:
                     self.addColumn(ItemColumn(source.name, 0))
                     for v in source:
                         yield [v]
-            elif len(source.shape) == 2:  # matrix
+            elif len(source.shape)==2:
+                matrix_enumerate = bool(self.options.get('hdf5_matrix_enumerate'))
+
                 ncols = source.shape[1]
-                for i in range(ncols):
-                    self.addColumn(ItemColumn('', i, width=8), index=i)
-                self.recalc()
-                yield from source  # copy
+                ctype = _guess_type(source.dtype.descr[0][1])
+
+                if matrix_enumerate:
+                    self.addColumn(ItemColumn("row", 0, width=8, keycol=1, type=int), index=0)
+                    for i in range(ncols):
+                        self.addColumn(ItemColumn(f'col{i}', i+1, width=8, type=ctype), index=i+1)
+                    self.recalc()
+                    yield from list(list((chain((i,), row))) for i, row in enumerate(source))
+                else:
+                    for i in range(ncols):
+                        self.addColumn(ItemColumn('', i, width=8, type=ctype), index=i)
+                    self.recalc()
+                    yield from source  # copy
             else:
                 vd.fail('too many dimensions in shape %s' % str(source.shape))
         else:
-            vd.fail('unknown h5 object type %s' % type(source))
+            vd.fail(f"too many dimensions in shape {source.shape}")
 
 
     def openRow(self, row):

diff --git a/visidata/loaders/npy.py b/visidata/loaders/npy.py
@@ -1,4 +1,5 @@
-from visidata import VisiData, vd, Sheet, date, anytype, options, Column, Progress, ColumnItem, vlen, PyobjSheet, TypedWrapper
+from visidata import VisiData, vd, Sheet, date, anytype, options, Column, ItemColumn, Progress, vlen, PyobjSheet, TypedWrapper
+from itertools import chain
 
 'Loaders for .npy and .npz.  Save to .npy.  Depends on the zip loader.'
 
@@ -11,44 +12,79 @@ def open_npz(vd, p):
     return NpzSheet(p.base_stem, source=p)
 
 vd.option('npy_allow_pickle', False, 'numpy allow unpickling objects (unsafe)')
+vd.option('npy_matrix_enumerate', False, 'enumerate matrix rows and columns')
 
 class NpySheet(Sheet):
+    _transpose: bool = False
+    _matrix_enumerate: bool = False
+
     def iterload(self):
         numpy = vd.importExternal('numpy')
         if not hasattr(self, 'npy'):
-            self.npy = numpy.load(str(self.source), encoding='bytes', **self.options.getall('npy_'))
+            self.npy = numpy.load(str(self.source), encoding='bytes', allow_pickle=bool(self.options.get('npy_allow_pickle')))
         self.reloadCols()
-        yield from Progress(self.npy, total=len(self.npy))
+        if self._transpose:
+            source = self.npy[:,None]
+        else:
+            source = self.npy
+
+        nrows = len(self.npy)
+
+        if self._matrix_enumerate:
+            source = list(list((chain((i,), row))) for i, row in enumerate(source))
+
+        yield from Progress(source, nrows)
+
 
     def reloadCols(self):
+        self._matrix_enumerate = bool(self.options.get('npy_matrix_enumerate'))
+
         self.columns = []
-        for i, (name, fmt, *shape) in enumerate(self.npy.dtype.descr):
-            if not name:
-                continue
-            if shape:
-                t = anytype
-            elif 'M' in fmt:
-                self.addColumn(Column(name, type=date, getter=lambda c,r,i=i: str(r[i])))
-                continue
-            elif 'i' in fmt or 'u' in fmt:
-                t = int
-            elif 'f' in fmt:
-                t = float
+        if len(self.npy.shape)==1:
+            self._transpose = not bool(self.npy.dtype.names)
+            for i, (colname, fmt, *shape) in enumerate(self.npy.dtype.descr):
+                if not colname:
+                    colname = f"col{i}"
+                ctype = _guess_type(shape, fmt)
+                if ctype=="time":
+                    self.addColumn(Column(colname, type=date, getter=lambda c,r,i=i: str(r[i])))
+                    continue
+                self.addColumn(ItemColumn(colname, i, type=ctype))
+        elif len(self.npy.shape)==2:
+            ncols = self.npy.shape[1]
+            ctype = _guess_type(None, self.npy.dtype.descr[0][1])
+
+            if self._matrix_enumerate:
+                self.addColumn(ItemColumn("row", 0, width=8, keycol=1, type=int), index=0)
+                for i in range(ncols):
+                    self.addColumn(ItemColumn(f'col{i}', i+1, width=8, type=ctype), index=i+1)
             else:
-                t = anytype
-            self.addColumn(ColumnItem(name, i, type=t))
-
+                for i in range(ncols):
+                    self.addColumn(ItemColumn('', i, width=8, type=ctype), index=i)
+        else:
+            vd.fail(f"too many dimensions in shape {self.npy.shape}")
+
+def _guess_type(shape, fmt):
+    if shape:
+        return anytype
+    elif 'M' in fmt:
+        return "time"
+    elif 'i' in fmt or 'u' in fmt:
+        return int
+    elif 'f' in fmt:
+        return float
+    return anytype
 
 class NpzSheet(vd.ZipSheet):
     # rowdef: tuple(tablename, table)
     columns = [
-        ColumnItem('name', 0),
-        ColumnItem('length', 1, type=vlen),
+        ItemColumn('name', 0),
+        ItemColumn('length', 1, type=vlen),
     ]
 
     def iterload(self):
         numpy = vd.importExternal('numpy')
-        self.npz = numpy.load(str(self.source), encoding='bytes', **self.options.getall('npy_'))
+        self.npz = numpy.load(str(self.source), encoding='bytes', allow_pickle=bool(self.options.get('npy_allow_pickle')))
         yield from Progress(self.npz.items())
 
     def openRow(self, row):
@@ -94,4 +130,4 @@ def save_npy(vd, p, sheet):
 
     arr = np.array(data, dtype=dtype)
     with p.open_bytes(mode='w') as outf:
-        np.save(outf, arr, **sheet.options.getall('npy_'))
+        np.save(outf, arr, allow_pickle=bool(sheet.options.get('npy_allow_pickle')))