Improve multi_index query/coords behavior

- Don't return coords for dense multi_index by default (#347) - Fix and test coords exclusion for sparse array queries
TileDB-Inc · Jul 6, 2020 · 2017ad9 · 2017ad9
1 parent 7550796
commit 2017ad9
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 15 deletions.
diff --git a/tiledb/core.cc b/tiledb/core.cc
@@ -214,21 +214,22 @@ class PyQuery {
     array_ = std::shared_ptr<tiledb::Array>(new Array(ctx_, c_array_, false),
                                             [](Array *p) {} /* no deleter*/);
 
+    bool issparse = array_->schema().array_type() == TILEDB_SPARSE;
+
     query_ = std::shared_ptr<tiledb::Query>(
         new Query(ctx_, *array_, TILEDB_READ));
         //        [](Query* p){} /* note: no deleter*/);
 
     tiledb_layout_t layout = (tiledb_layout_t)py_layout.cast<int32_t>();
-    if (array_->schema().array_type() == TILEDB_DENSE &&
-        layout == TILEDB_UNORDERED) {
+    if (issparse && layout == TILEDB_UNORDERED) {
           TPY_ERROR_LOC("TILEDB_UNORDERED read is not supported for dense arrays")
     }
     query_->set_layout(layout);
 
-    if (coords.is(py::none())) {
-      include_coords_ = true;
-    } else {
+    if (!coords.is(py::none())) {
       include_coords_ = coords.cast<bool>();
+    } else {
+      include_coords_ = issparse;
     }
 
     for (auto a : attrs) {
@@ -553,10 +554,8 @@ class PyQuery {
 
   void submit_read() {
     auto schema = array_->schema();
-    auto issparse = schema.array_type() == TILEDB_SPARSE;
-    auto need_dim_buffers = include_coords_ || issparse;
 
-    if (need_dim_buffers) {
+    if (include_coords_) {
       auto domain = schema.domain();
       for (auto dim : domain.dimensions()) {
         alloc_buffer(dim.name());

diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py
@@ -51,8 +51,7 @@ def sel_to_subranges(dim_sel):
 
 class MultiRangeIndexer(object):
     """
-    Implements multi-range / outer / orthogonal indexing.
-
+    Implements multi-range indexing.
     """
 
     def __init__(self, array, query = None):
@@ -111,7 +110,6 @@ def __getitem__(self, idx):
         schema = self.schema
         dom = self.schema.domain
         attr_names = tuple(self.schema.attr(i)._internal_name for i in range(self.schema.nattr))
-
         coords = None
         order = 'C' # TILEDB_ROW_MAJOR
         if self.query is not None:

diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py
@@ -2853,7 +2853,9 @@ def test_tiledb_py_0_6_anon_attr(self):
             self.assertEqual(A[0], 1)
             mres = A.multi_index[0]
             self.assertEqual(mres[''], 1)
-            self.assertEqual(mres['d'], 0)
+
+            qres = A.query(coords=True).multi_index[0]
+            self.assertEqual(qres['d'], 0)
 
 class MemoryTest(DiskTestCase):
     # sanity check that memory usage doesn't increase more than 2x when reading 40MB 100x

diff --git a/tiledb/tests/test_multi_index.py b/tiledb/tests/test_multi_index.py
@@ -299,7 +299,7 @@ def test_multirange_1d_dense_int64(self):
         with tiledb.open(path) as A:
             # stepped ranges are not supported
             with self.assertRaises(ValueError):
-                A.multi_index[ 1::2 ]
+                A.query(coords=True).multi_index[ 1::2 ]
 
             assert_array_equal(
                 orig_array[ [0,-1] ],
@@ -311,7 +311,7 @@ def test_multirange_1d_dense_int64(self):
             )
             self.assertEqual(
                 -10,
-                A.multi_index[-10]['coords'].view('i8')
+                A.query(coords=True).multi_index[-10]['coords'].view('i8')
             )
             assert_array_equal(
                 orig_array[0:],
@@ -576,6 +576,14 @@ def test_multirange_1d_sparse_query(self):
                     res[k]
                 )
 
+        with tiledb.open(path) as A:
+            Q = A.query(coords=False, attrs=["U"])
+            res = Q.multi_index[:]
+            self.assertTrue("U" in res)
+            self.assertTrue("V" not in res)
+            self.assertTrue("coords" not in res)
+            assert_array_equal(res["U"], data["U"])
+
     def test_multirange_1d_dense_vectorized(self):
         ctx = tiledb.Ctx()
         path = self.path('mr_1d_dense_vectorized')

diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py
@@ -467,7 +467,6 @@ def test_csv_chunked(self):
             ned = A.nonempty_domain()[0]
             # TODO should support numpy scalar here
             res = A.multi_index[int(ned[0]):int(ned[1])]
-            res.pop('rows')
             df_bk = pd.DataFrame(res)
 
             tm.assert_frame_equal(df_bk, df)