From d8b3c7037a194aa179954bcfdbc407f756e829f9 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 6 Aug 2023 16:36:26 +0000
Subject: [PATCH 001/105] fix: cleanup ingest code

---
 .../graph/connectivity/cross_edges.py         | 24 ++++---------------
 pychunkedgraph/graph/misc.py                  |  1 -
 .../ingest/create/abstract_layers.py          | 21 ++--------------
 pychunkedgraph/ingest/create/atomic_layer.py  |  4 ++--
 4 files changed, 9 insertions(+), 41 deletions(-)

diff --git a/pychunkedgraph/graph/connectivity/cross_edges.py b/pychunkedgraph/graph/connectivity/cross_edges.py
index 8aa52a9f1..d69759bbf 100644
--- a/pychunkedgraph/graph/connectivity/cross_edges.py
+++ b/pychunkedgraph/graph/connectivity/cross_edges.py
@@ -1,10 +1,9 @@
-import time
+# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel
+
 import math
 import multiprocessing as mp
 from collections import defaultdict
-from typing import Optional
 from typing import Sequence
-from typing import List
 from typing import Dict
 
 import numpy as np
@@ -13,9 +12,7 @@
 from .. import attributes
 from ..types import empty_2d
 from ..utils import basetypes
-from ..utils import serializers
 from ..chunkedgraph import ChunkedGraph
-from ..utils.generic import get_valid_timestamp
 from ..utils.generic import filter_failed_node_ids
 from ..chunks.atomic import get_touching_atomic_chunks
 from ..chunks.atomic import get_bounding_atomic_chunks
@@ -30,14 +27,12 @@ def get_children_chunk_cross_edges(
     The edges are between node IDs in the given layer (not atomic).
     """
     atomic_chunks = get_touching_atomic_chunks(cg.meta, layer, chunk_coord)
-    if not len(atomic_chunks):
+    if len(atomic_chunks) == 0:
         return []
 
-    print(f"touching atomic chunk count {len(atomic_chunks)}")
     if not use_threads:
         return _get_children_chunk_cross_edges(cg, atomic_chunks, layer - 1)
 
-    print("get_children_chunk_cross_edges, atomic chunks", len(atomic_chunks))
     with mp.Manager() as manager:
         edge_ids_shared = manager.list()
         edge_ids_shared.append(empty_2d)
@@ -69,9 +64,6 @@ def _get_children_chunk_cross_edges_helper(args) -> None:
 
 
 def _get_children_chunk_cross_edges(cg, atomic_chunks, layer) -> None:
-    print(
-        f"_get_children_chunk_cross_edges {layer} atomic_chunks count {len(atomic_chunks)}"
-    )
     cross_edges = [empty_2d]
     for layer2_chunk in atomic_chunks:
         edges = _read_atomic_chunk_cross_edges(cg, layer2_chunk, layer)
@@ -80,11 +72,10 @@ def _get_children_chunk_cross_edges(cg, atomic_chunks, layer) -> None:
     cross_edges = np.concatenate(cross_edges)
     if not cross_edges.size:
         return empty_2d
-    print(f"getting roots at stop_layer {layer} {cross_edges.shape}")
+
     cross_edges[:, 0] = cg.get_roots(cross_edges[:, 0], stop_layer=layer, ceil=False)
     cross_edges[:, 1] = cg.get_roots(cross_edges[:, 1], stop_layer=layer, ceil=False)
     result = np.unique(cross_edges, axis=0) if cross_edges.size else empty_2d
-    print(f"_get_children_chunk_cross_edges done {result.shape}")
     return result
 
 
@@ -118,16 +109,13 @@ def get_chunk_nodes_cross_edge_layer(
     return_type dict {node_id: layer}
     the lowest layer (>= current layer) at which a node_id is part of a cross edge
     """
-    print("get_bounding_atomic_chunks")
     atomic_chunks = get_bounding_atomic_chunks(cg.meta, layer, chunk_coord)
-    print("get_bounding_atomic_chunks complete")
-    if not len(atomic_chunks):
+    if len(atomic_chunks) == 0:
         return {}
 
     if not use_threads:
         return _get_chunk_nodes_cross_edge_layer(cg, atomic_chunks, layer)
 
-    print("divide tasks")
     cg_info = cg.get_serialized_info()
     manager = mp.Manager()
     ids_l_shared = manager.list()
@@ -139,7 +127,6 @@ def get_chunk_nodes_cross_edge_layer(
         multi_args.append(
             (ids_l_shared, layers_l_shared, cg_info, atomic_chunks, layer)
         )
-    print("divide tasks complete")
 
     multiprocess_func(
         _get_chunk_nodes_cross_edge_layer_helper,
@@ -149,7 +136,6 @@ def get_chunk_nodes_cross_edge_layer(
 
     node_layer_d_shared = manager.dict()
     _find_min_layer(node_layer_d_shared, ids_l_shared, layers_l_shared)
-    print("_find_min_layer complete")
     return node_layer_d_shared
 
 
diff --git a/pychunkedgraph/graph/misc.py b/pychunkedgraph/graph/misc.py
index b33e8a6fd..873422db1 100644
--- a/pychunkedgraph/graph/misc.py
+++ b/pychunkedgraph/graph/misc.py
@@ -202,7 +202,6 @@ def get_contact_sites(
     # Load edges of these cs_svs
     edges_cs_svs_rows = cg.client.read_nodes(
         node_ids=u_cs_svs,
-        # columns=[attributes.Connectivity.Partner, attributes.Connectivity.Connected],
     )
     pre_cs_edges = []
     for ri in edges_cs_svs_rows.items():
diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 529a6846f..1973daacc 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -1,15 +1,14 @@
+# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel
+
 """
 Functions for creating parents in level 3 and above
 """
 
-import time
 import math
 import datetime
 import multiprocessing as mp
-from collections import defaultdict
 from typing import Optional
 from typing import Sequence
-from typing import List
 
 import numpy as np
 from multiwrapper import multiprocessing_utils as mu
@@ -44,11 +43,6 @@ def add_layer(
         cg, layer_id, parent_coords, use_threads=n_threads > 1
     )
 
-    print("children_coords", children_coords.size, layer_id, parent_coords)
-    print(
-        "n e", len(children_ids), len(edge_ids), layer_id, parent_coords,
-    )
-
     node_layers = cg.get_chunk_layers(children_ids)
     edge_layers = cg.get_chunk_layers(np.unique(edge_ids))
     assert np.all(node_layers < layer_id), "invalid node layers"
@@ -62,7 +56,6 @@ def add_layer(
     edge_ids.extend(add_edge_ids)
     graph, _, _, graph_ids = flatgraph.build_gt_graph(edge_ids, make_directed=True)
     ccs = flatgraph.connected_components(graph)
-    print("ccs", len(ccs))
     _write_connected_components(
         cg,
         layer_id,
@@ -84,7 +77,6 @@ def _read_children_chunks(
             children_ids.append(_read_chunk([], cg, layer_id - 1, child_coord))
         return np.concatenate(children_ids)
 
-    print("_read_children_chunks")
     with mp.Manager() as manager:
         children_ids_shared = manager.list()
         multi_args = []
@@ -102,7 +94,6 @@ def _read_children_chunks(
             multi_args,
             n_threads=min(len(multi_args), mp.cpu_count()),
         )
-        print("_read_children_chunks done")
         return np.concatenate(children_ids_shared)
 
 
@@ -113,7 +104,6 @@ def _read_chunk_helper(args):
 
 
 def _read_chunk(children_ids_shared, cg: ChunkedGraph, layer_id: int, chunk_coord):
-    print(f"_read_chunk {layer_id}, {chunk_coord}")
     x, y, z = chunk_coord
     range_read = cg.range_read_chunk(
         cg.get_chunk_id(layer=layer_id, x=x, y=y, z=z),
@@ -129,7 +119,6 @@ def _read_chunk(children_ids_shared, cg: ChunkedGraph, layer_id: int, chunk_coor
 
     row_ids = filter_failed_node_ids(row_ids, segment_ids, max_children_ids)
     children_ids_shared.append(row_ids)
-    print(f"_read_chunk {layer_id}, {chunk_coord} done {len(row_ids)}")
     return row_ids
 
 
@@ -147,13 +136,10 @@ def _write_connected_components(
 
     node_layer_d_shared = {}
     if layer_id < cg.meta.layer_count:
-        print("getting node_layer_d_shared")
         node_layer_d_shared = get_chunk_nodes_cross_edge_layer(
             cg, layer_id, parent_coords, use_threads=use_threads
         )
 
-    print("node_layer_d_shared", len(node_layer_d_shared))
-
     ccs_with_node_ids = []
     for cc in ccs:
         ccs_with_node_ids.append(graph_ids[cc])
@@ -186,7 +172,6 @@ def _write_connected_components(
 
 
 def _write_components_helper(args):
-    print("running _write_components_helper")
     cg_info, layer_id, parent_coords, ccs, node_layer_d_shared, time_stamp = args
     cg = ChunkedGraph(**cg_info)
     _write(cg, layer_id, parent_coords, ccs, node_layer_d_shared, time_stamp)
@@ -241,7 +226,5 @@ def _write(
 
             if len(rows) > 100000:
                 cg.client.write(rows)
-                print("wrote rows", len(rows), layer_id, parent_coords)
                 rows = []
     cg.client.write(rows)
-    print("wrote rows", len(rows), layer_id, parent_coords)
diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py
index 4fa1f1688..d87638b26 100644
--- a/pychunkedgraph/ingest/create/atomic_layer.py
+++ b/pychunkedgraph/ingest/create/atomic_layer.py
@@ -1,14 +1,14 @@
+# pylint: disable=invalid-name, missing-function-docstring, import-outside-toplevel
+
 """
 Functions for creating atomic nodes and their level 2 abstract parents
 """
 
 import datetime
 from typing import Dict
-from typing import List
 from typing import Optional
 from typing import Sequence
 
-import pytz
 import numpy as np
 
 from ...graph import attributes

From 76667a3998991b31072beacbbea7c2d61fad6633 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 6 Aug 2023 16:38:22 +0000
Subject: [PATCH 002/105] add ttl column family

---
 pychunkedgraph/graph/client/bigtable/client.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 5b86826bd..486cbdd73 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel, line-too-long, protected-access, arguments-differ, arguments-renamed, logging-fstring-interpolation
+# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel, line-too-long, protected-access, arguments-differ, arguments-renamed, logging-fstring-interpolation, too-many-arguments
 
 import sys
 import time
@@ -15,11 +15,12 @@
 from google.api_core.exceptions import Aborted
 from google.api_core.exceptions import DeadlineExceeded
 from google.api_core.exceptions import ServiceUnavailable
+from google.cloud.bigtable.column_family import MaxAgeGCRule
+from google.cloud.bigtable.column_family import MaxVersionsGCRule
 from google.cloud.bigtable.table import Table
 from google.cloud.bigtable.row_set import RowSet
 from google.cloud.bigtable.row_data import PartialRowData
 from google.cloud.bigtable.row_filters import RowFilter
-from google.cloud.bigtable.column_family import MaxVersionsGCRule
 
 from . import utils
 from . import BigTableConfig
@@ -637,6 +638,8 @@ def _create_column_families(self):
         f.create()
         f = self._table.column_family("3")
         f.create()
+        f = self._table.column_family("4", gc_rule=MaxAgeGCRule(datetime.timedelta(days=1)))
+        f.create()
 
     def _get_ids_range(self, key: bytes, size: int) -> typing.Tuple:
         """Returns a range (min, max) of IDs for a given `key`."""

From 084d642f20287fcc9ea8bc107f9ca8da0e0b0af1 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 6 Aug 2023 16:40:55 +0000
Subject: [PATCH 003/105] fix: new l2 cx edge attribute

---
 pychunkedgraph/graph/attributes.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index 3e48d204a..ea03d2216 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -1,6 +1,9 @@
+# pylint: disable=invalid-name, missing-docstring, protected-access, raise-missing-from
+
 # TODO design to use these attributes across different clients
 # `family_id` is specific to bigtable
 
+from enum import Enum
 from typing import NamedTuple
 
 from .utils import serializers
@@ -101,8 +104,8 @@ class Connectivity:
         serializer=serializers.NumPyArray(dtype=basetypes.EDGE_AREA),
     )
 
-    CrossChunkEdge = _AttributeArray(
-        pattern=b"atomic_cross_edges_%d",
+    L2CrossChunkEdge = _AttributeArray(
+        pattern=b"l2_cross_edge_%d",
         family_id="3",
         serializer=serializers.NumPyArray(
             dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
@@ -115,6 +118,14 @@ class Connectivity:
         serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
     )
 
+    CrossChunkEdge = _AttributeArray(
+        pattern=b"atomic_cross_edges_%d",
+        family_id="4",
+        serializer=serializers.NumPyArray(
+            dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
+        ),
+    )
+
 
 class Hierarchy:
     Child = _Attribute(
@@ -157,8 +168,6 @@ class GraphVersion:
 class OperationLogs:
     key = b"ioperations"
 
-    from enum import Enum
-
     class StatusCodes(Enum):
         SUCCESS = 0  # all is well, new changes persisted
         CREATED = 1  # log record created in storage

From 428365ee9d8dac97408b7ef75a6c78e3239b0bbd Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 6 Aug 2023 20:02:44 +0000
Subject: [PATCH 004/105] feat: post process sv cross edges

---
 pychunkedgraph/graph/attributes.py            |  6 +--
 .../graph/client/bigtable/client.py           |  4 +-
 pychunkedgraph/ingest/create/atomic_layer.py  | 54 ++++++++++++++++++-
 3 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index ea03d2216..b58a6f0f8 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -106,7 +106,7 @@ class Connectivity:
 
     L2CrossChunkEdge = _AttributeArray(
         pattern=b"l2_cross_edge_%d",
-        family_id="3",
+        family_id="4",
         serializer=serializers.NumPyArray(
             dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
         ),
@@ -114,13 +114,13 @@ class Connectivity:
 
     FakeEdges = _Attribute(
         key=b"fake_edges",
-        family_id="3",
+        family_id="4",
         serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
     )
 
     CrossChunkEdge = _AttributeArray(
         pattern=b"atomic_cross_edges_%d",
-        family_id="4",
+        family_id="3",
         serializer=serializers.NumPyArray(
             dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
         ),
diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 486cbdd73..19a08b9a8 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -636,9 +636,9 @@ def _create_column_families(self):
         f.create()
         f = self._table.column_family("2")
         f.create()
-        f = self._table.column_family("3")
+        f = self._table.column_family("3", gc_rule=MaxAgeGCRule(datetime.timedelta(days=1)))
         f.create()
-        f = self._table.column_family("4", gc_rule=MaxAgeGCRule(datetime.timedelta(days=1)))
+        f = self._table.column_family("4")
         f.create()
 
     def _get_ids_range(self, key: bytes, size: int) -> typing.Tuple:
diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py
index d87638b26..a59bc9f20 100644
--- a/pychunkedgraph/ingest/create/atomic_layer.py
+++ b/pychunkedgraph/ingest/create/atomic_layer.py
@@ -101,7 +101,13 @@ def _get_remapping(chunk_edges_d: dict):
 
 
 def _process_component(
-    cg, chunk_edges_d, parent_id, node_ids, sparse_indices, remapping, time_stamp,
+    cg,
+    chunk_edges_d,
+    parent_id,
+    node_ids,
+    sparse_indices,
+    remapping,
+    time_stamp,
 ):
     nodes = []
     chunk_out_edges = []  # out = between + cross
@@ -145,3 +151,49 @@ def _get_outgoing_edges(node_id, chunk_edges_d, sparse_indices, remapping):
             # edges that this node is part of
             chunk_out_edges = np.concatenate([chunk_out_edges, edges[row_ids]])
     return chunk_out_edges
+
+
+def postprocess_atomic_chunk(
+    cg: ChunkedGraph,
+    chunk_coord: np.ndarray,
+    time_stamp: Optional[datetime.datetime] = None,
+):
+    time_stamp = get_valid_timestamp(time_stamp)
+
+    chunk_id = cg.get_chunk_id(
+        layer=2, x=chunk_coord[0], y=chunk_coord[1], z=chunk_coord[2]
+    )
+
+    properties = [
+        attributes.Connectivity.CrossChunkEdge[l] for l in range(2, cg.meta.layer_count)
+    ]
+
+    chunk_rr = cg.range_read_chunk(
+        chunk_id, properties=properties, time_stamp=time_stamp
+    )
+
+    result = {}
+    for l2id, raw_cx_edges in chunk_rr.items():
+        try:
+            cx_edges = {
+                prop.index: val[0].value.copy() for prop, val in raw_cx_edges.items()
+            }
+            result[l2id] = cx_edges
+        except KeyError:
+            continue
+
+    nodes = []
+    val_dicts = []
+    for l2id, cx_edges in result.items():
+        val_dict = {}
+        for layer, edges in cx_edges.items():
+            l2_edges = np.zeros_like(edges)
+            l2_edges[:, 0] = l2id
+            l2_edges[:, 1] = cg.get_parents(edges[:, 1])
+            col = attributes.Connectivity.L2CrossChunkEdge[layer]
+            val_dict[col] = np.unique(l2_edges, axis=0)
+            val_dicts.append(val_dict)
+
+        r_key = serializers.serialize_uint64(l2id)
+        nodes.append(cg.client.mutate_row(r_key, val_dict, time_stamp=time_stamp))
+    cg.client.write(nodes)

From a439b29301db0a127114d7136fb729f31ae37d06 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Fri, 11 Aug 2023 15:11:02 +0000
Subject: [PATCH 005/105] fix: use longer expiry for debugging

---
 pychunkedgraph/graph/attributes.py             | 12 ++++++------
 pychunkedgraph/graph/client/bigtable/client.py |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index b58a6f0f8..a3cf4a99c 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -104,6 +104,12 @@ class Connectivity:
         serializer=serializers.NumPyArray(dtype=basetypes.EDGE_AREA),
     )
 
+    FakeEdges = _Attribute(
+        key=b"fake_edges",
+        family_id="4",
+        serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
+    )
+
     L2CrossChunkEdge = _AttributeArray(
         pattern=b"l2_cross_edge_%d",
         family_id="4",
@@ -112,12 +118,6 @@ class Connectivity:
         ),
     )
 
-    FakeEdges = _Attribute(
-        key=b"fake_edges",
-        family_id="4",
-        serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
-    )
-
     CrossChunkEdge = _AttributeArray(
         pattern=b"atomic_cross_edges_%d",
         family_id="3",
diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 19a08b9a8..135ad9d07 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -636,7 +636,7 @@ def _create_column_families(self):
         f.create()
         f = self._table.column_family("2")
         f.create()
-        f = self._table.column_family("3", gc_rule=MaxAgeGCRule(datetime.timedelta(days=1)))
+        f = self._table.column_family("3", gc_rule=MaxAgeGCRule(datetime.timedelta(days=365)))
         f.create()
         f = self._table.column_family("4")
         f.create()

From 87edec237a0ca598c8a22556bd54eb6b7cb37fac Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Fri, 11 Aug 2023 15:55:37 +0000
Subject: [PATCH 006/105] feat(ingest): read l2 cross edges

---
 pychunkedgraph/graph/connectivity/cross_edges.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/graph/connectivity/cross_edges.py b/pychunkedgraph/graph/connectivity/cross_edges.py
index d69759bbf..99dc8df7f 100644
--- a/pychunkedgraph/graph/connectivity/cross_edges.py
+++ b/pychunkedgraph/graph/connectivity/cross_edges.py
@@ -82,7 +82,7 @@ def _get_children_chunk_cross_edges(cg, atomic_chunks, layer) -> None:
 def _read_atomic_chunk_cross_edges(
     cg, chunk_coord: Sequence[int], cross_edge_layer: int
 ) -> np.ndarray:
-    cross_edge_col = attributes.Connectivity.CrossChunkEdge[cross_edge_layer]
+    cross_edge_col = attributes.Connectivity.L2CrossChunkEdge[cross_edge_layer]
     range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, [cross_edge_layer])
 
     parent_neighboring_chunk_supervoxels_d = defaultdict(list)
@@ -170,7 +170,7 @@ def _read_atomic_chunk_cross_edge_nodes(cg, chunk_coord, cross_edge_layers):
     range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, cross_edge_layers)
     for l2id in l2ids:
         for layer in cross_edge_layers:
-            if attributes.Connectivity.CrossChunkEdge[layer] in range_read[l2id]:
+            if attributes.Connectivity.L2CrossChunkEdge[layer] in range_read[l2id]:
                 node_layer_d[l2id] = layer
                 break
     return node_layer_d
@@ -190,7 +190,7 @@ def _read_atomic_chunk(cg, chunk_coord, layers):
     range_read = cg.range_read_chunk(
         cg.get_chunk_id(layer=2, x=x, y=y, z=z),
         properties=[child_col]
-        + [attributes.Connectivity.CrossChunkEdge[l] for l in layers],
+        + [attributes.Connectivity.L2CrossChunkEdge[l] for l in layers],
     )
 
     row_ids = []

From 5402c35d2b737f1159fb2d228a781c89103f85f0 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 12 Aug 2023 16:41:25 +0000
Subject: [PATCH 007/105] feat(ingest): postprocess job handling

---
 pychunkedgraph/ingest/cli.py     | 25 ++++++++--
 pychunkedgraph/ingest/cluster.py | 79 +++++++++++++-------------------
 2 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 7668e8f24..ed0c3a3d6 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -1,3 +1,5 @@
+# pylint: disable=invalid-name, missing-function-docstring, import-outside-toplevel
+
 """
 cli for running ingest
 """
@@ -10,6 +12,7 @@
 from flask.cli import AppGroup
 from rq import Queue
 
+from .cluster import enqueue_atomic_tasks
 from .manager import IngestionManager
 from .utils import bootstrap
 from .cluster import randomize_grid_points
@@ -45,8 +48,6 @@ def ingest_graph(
     Main ingest command.
     Takes ingest config from a yaml file and queues atomic tasks.
     """
-    from .cluster import enqueue_atomic_tasks
-
     with open(dataset, "r") as stream:
         config = yaml.safe_load(stream)
 
@@ -62,6 +63,16 @@ def ingest_graph(
     enqueue_atomic_tasks(IngestionManager(ingest_config, meta))
 
 
+@ingest_cli.command("postprocess")
+def postprocess():
+    """
+    Run postprocessing step on level 2 chunks.
+    """
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    enqueue_atomic_tasks(imanager, postprocess=True)
+
+
 @ingest_cli.command("imanager")
 @click.argument("graph_id", type=str)
 @click.argument("dataset", type=click.Path(exists=True))
@@ -143,7 +154,15 @@ def ingest_status():
     """Print ingest status to console by layer."""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    layers = range(2, imanager.cg_meta.layer_count + 1)
+
+    layer = 2
+    completed = redis.scard(f"{layer}c")
+    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_count}")
+
+    completed = redis.scard(f"pp{layer}c")
+    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_count} [postprocess]")
+
+    layers = range(3, imanager.cg_meta.layer_count + 1)
     for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts):
         completed = redis.scard(f"{layer}c")
         print(f"{layer}\t: {completed} / {layer_count}")
diff --git a/pychunkedgraph/ingest/cluster.py b/pychunkedgraph/ingest/cluster.py
index cf9417024..768c474ce 100644
--- a/pychunkedgraph/ingest/cluster.py
+++ b/pychunkedgraph/ingest/cluster.py
@@ -1,3 +1,5 @@
+# pylint: disable=invalid-name, missing-function-docstring, import-outside-toplevel
+
 """
 Ingest / create chunkedgraph with workers.
 """
@@ -11,6 +13,7 @@
 from .common import get_atomic_chunk_data
 from .ran_agglomeration import get_active_edges
 from .create.atomic_layer import add_atomic_edges
+from .create.atomic_layer import postprocess_atomic_chunk
 from .create.abstract_layers import add_layer
 from ..graph.meta import ChunkedGraphMeta
 from ..graph.chunks.hierarchy import get_children_chunk_coords
@@ -18,44 +21,16 @@
 from ..utils.redis import get_redis_connection
 
 
-def _post_task_completion(imanager: IngestionManager, layer: int, coords: np.ndarray):
-    from os import environ
-
+def _post_task_completion(
+    imanager: IngestionManager,
+    layer: int,
+    coords: np.ndarray,
+    postprocess: bool = False,
+):
     chunk_str = "_".join(map(str, coords))
     # mark chunk as completed - "c"
-    imanager.redis.sadd(f"{layer}c", chunk_str)
-
-    if environ.get("DO_NOT_AUTOQUEUE_PARENT_CHUNKS", None) is not None:
-        return
-
-    parent_layer = layer + 1
-    if parent_layer > imanager.cg_meta.layer_count:
-        return
-
-    parent_coords = np.array(coords, int) // imanager.cg_meta.graph_config.FANOUT
-    parent_id_str = chunk_id_str(parent_layer, parent_coords)
-    imanager.redis.sadd(parent_id_str, chunk_str)
-
-    parent_chunk_str = "_".join(map(str, parent_coords))
-    if not imanager.redis.hget(parent_layer, parent_chunk_str):
-        # cache children chunk count
-        # checked by tracker worker to enqueue parent chunk
-        children_count = len(
-            get_children_chunk_coords(imanager.cg_meta, parent_layer, parent_coords)
-        )
-        imanager.redis.hset(parent_layer, parent_chunk_str, children_count)
-
-    tracker_queue = imanager.get_task_queue(f"t{layer}")
-    tracker_queue.enqueue(
-        enqueue_parent_task,
-        job_id=f"t{layer}_{chunk_str}",
-        job_timeout=f"30s",
-        result_ttl=0,
-        args=(
-            parent_layer,
-            parent_coords,
-        ),
-    )
+    pprocess = "_pprocess" if postprocess else ""
+    imanager.redis.sadd(f"{layer}c{pprocess}", chunk_str)
 
 
 def enqueue_parent_task(
@@ -127,7 +102,7 @@ def randomize_grid_points(X: int, Y: int, Z: int) -> Tuple[int, int, int]:
         yield np.unravel_index(index, (X, Y, Z))
 
 
-def enqueue_atomic_tasks(imanager: IngestionManager):
+def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
     from os import environ
     from time import sleep
     from rq import Queue as RQueue
@@ -138,13 +113,18 @@ def enqueue_atomic_tasks(imanager: IngestionManager):
         atomic_chunk_bounds = imanager.cg_meta.layer_chunk_bounds[2]
         chunk_coords = randomize_grid_points(*atomic_chunk_bounds)
         chunk_count = imanager.cg_meta.layer_chunk_counts[0]
-
     print(f"total chunk count: {chunk_count}, queuing...")
-    batch_size = int(environ.get("L2JOB_BATCH_SIZE", 1000))
 
+    pprocess = ""
+    if postprocess:
+        pprocess = "_pprocess"
+        print("postprocessing l2 chunks")
+
+    queue_name = f"{imanager.config.CLUSTER.ATOMIC_Q_NAME}{pprocess}"
+    q = imanager.get_task_queue(queue_name)
     job_datas = []
+    batch_size = int(environ.get("L2JOB_BATCH_SIZE", 1000))
     for chunk_coord in chunk_coords:
-        q = imanager.get_task_queue(imanager.config.CLUSTER.ATOMIC_Q_NAME)
         # buffer for optimal use of redis memory
         if len(q) > imanager.config.CLUSTER.ATOMIC_Q_LIMIT:
             print(f"Sleeping {imanager.config.CLUSTER.ATOMIC_Q_INTERVAL}s...")
@@ -152,13 +132,13 @@ def enqueue_atomic_tasks(imanager: IngestionManager):
 
         x, y, z = chunk_coord
         chunk_str = f"{x}_{y}_{z}"
-        if imanager.redis.sismember("2c", chunk_str):
+        if imanager.redis.sismember(f"2c{pprocess}", chunk_str):
             # already done, skip
             continue
         job_datas.append(
             RQueue.prepare_data(
                 _create_atomic_chunk,
-                args=(chunk_coord,),
+                args=(chunk_coord, postprocess),
                 timeout=environ.get("L2JOB_TIMEOUT", "3m"),
                 result_ttl=0,
                 job_id=chunk_id_str(2, chunk_coord),
@@ -170,21 +150,26 @@ def enqueue_atomic_tasks(imanager: IngestionManager):
     q.enqueue_many(job_datas)
 
 
-def _create_atomic_chunk(coords: Sequence[int]):
+def _create_atomic_chunk(coords: Sequence[int], postprocess: bool = False):
     """Creates single atomic chunk"""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
     coords = np.array(list(coords), dtype=int)
-    chunk_edges_all, mapping = get_atomic_chunk_data(imanager, coords)
-    chunk_edges_active, isolated_ids = get_active_edges(chunk_edges_all, mapping)
-    add_atomic_edges(imanager.cg, coords, chunk_edges_active, isolated=isolated_ids)
+
+    if postprocess:
+        postprocess_atomic_chunk(imanager.cg, coords)
+    else:
+        chunk_edges_all, mapping = get_atomic_chunk_data(imanager, coords)
+        chunk_edges_active, isolated_ids = get_active_edges(chunk_edges_all, mapping)
+        add_atomic_edges(imanager.cg, coords, chunk_edges_active, isolated=isolated_ids)
+
     if imanager.config.TEST_RUN:
         # print for debugging
         for k, v in chunk_edges_all.items():
             print(k, len(v))
         for k, v in chunk_edges_active.items():
             print(f"active_{k}", len(v))
-    _post_task_completion(imanager, 2, coords)
+    _post_task_completion(imanager, 2, coords, postprocess=postprocess)
 
 
 def _get_test_chunks(meta: ChunkedGraphMeta):

From 42e2b58d891bda1f9bb085dff914c7a16278c431 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 12 Aug 2023 17:14:37 +0000
Subject: [PATCH 008/105] fix(ingest): status

---
 pychunkedgraph/ingest/cli.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index ed0c3a3d6..8cf081952 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -157,13 +157,13 @@ def ingest_status():
 
     layer = 2
     completed = redis.scard(f"{layer}c")
-    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_count}")
+    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]}")
 
     completed = redis.scard(f"pp{layer}c")
-    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_count} [postprocess]")
+    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]} [postprocess]")
 
-    layers = range(3, imanager.cg_meta.layer_count + 1)
-    for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts):
+    layers = range(3, imanager.cg_meta.layer_count)
+    for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts[1:]):
         completed = redis.scard(f"{layer}c")
         print(f"{layer}\t: {completed} / {layer_count}")
 

From 49286cd6120dd61c4d372874b9249774147c19d1 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 12 Aug 2023 17:20:15 +0000
Subject: [PATCH 009/105] fix: timedelta import

---
 pychunkedgraph/graph/client/bigtable/client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 135ad9d07..788c76a8e 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -4,8 +4,8 @@
 import time
 import typing
 import logging
-import datetime
 from datetime import datetime
+from datetime import timedelta
 
 import numpy as np
 from multiwrapper import multiprocessing_utils as mu
@@ -636,7 +636,7 @@ def _create_column_families(self):
         f.create()
         f = self._table.column_family("2")
         f.create()
-        f = self._table.column_family("3", gc_rule=MaxAgeGCRule(datetime.timedelta(days=365)))
+        f = self._table.column_family("3", gc_rule=MaxAgeGCRule(timedelta(days=365)))
         f.create()
         f = self._table.column_family("4")
         f.create()

From 2f060cda5f649ac75986aca2456ce20d95c77a46 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 12 Aug 2023 19:31:05 +0000
Subject: [PATCH 010/105] fix(ingest): status

---
 pychunkedgraph/ingest/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 8cf081952..aedcb6d97 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -159,10 +159,10 @@ def ingest_status():
     completed = redis.scard(f"{layer}c")
     print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]}")
 
-    completed = redis.scard(f"pp{layer}c")
+    completed = redis.scard(f"{layer}c_pprocess")
     print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]} [postprocess]")
 
-    layers = range(3, imanager.cg_meta.layer_count)
+    layers = range(3, imanager.cg_meta.layer_count + 1)
     for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts[1:]):
         completed = redis.scard(f"{layer}c")
         print(f"{layer}\t: {completed} / {layer_count}")

From c234e7977abb6b7fd3e231fe318585980b95c96c Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 12 Aug 2023 19:35:53 +0000
Subject: [PATCH 011/105] fix(ingest): use hypenated names for valid dns

---
 pychunkedgraph/ingest/cli.py     | 2 +-
 pychunkedgraph/ingest/cluster.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index aedcb6d97..145c9bea6 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -159,7 +159,7 @@ def ingest_status():
     completed = redis.scard(f"{layer}c")
     print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]}")
 
-    completed = redis.scard(f"{layer}c_pprocess")
+    completed = redis.scard(f"{layer}c-postprocess")
     print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]} [postprocess]")
 
     layers = range(3, imanager.cg_meta.layer_count + 1)
diff --git a/pychunkedgraph/ingest/cluster.py b/pychunkedgraph/ingest/cluster.py
index 768c474ce..2b7927869 100644
--- a/pychunkedgraph/ingest/cluster.py
+++ b/pychunkedgraph/ingest/cluster.py
@@ -29,7 +29,7 @@ def _post_task_completion(
 ):
     chunk_str = "_".join(map(str, coords))
     # mark chunk as completed - "c"
-    pprocess = "_pprocess" if postprocess else ""
+    pprocess = "-postprocess" if postprocess else ""
     imanager.redis.sadd(f"{layer}c{pprocess}", chunk_str)
 
 
@@ -117,7 +117,7 @@ def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
 
     pprocess = ""
     if postprocess:
-        pprocess = "_pprocess"
+        pprocess = "-postprocess"
         print("postprocessing l2 chunks")
 
     queue_name = f"{imanager.config.CLUSTER.ATOMIC_Q_NAME}{pprocess}"

From 1920dfd972b00cdeca431d2d28f1d0e47ab2bf7a Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 19:39:35 +0000
Subject: [PATCH 012/105] fix: rename attr; better var names

---
 pychunkedgraph/graph/attributes.py            |  6 ++--
 .../ingest/create/abstract_layers.py          | 32 +++++++++++--------
 pychunkedgraph/ingest/create/atomic_layer.py  |  6 ++--
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index a3cf4a99c..b0f18c2ec 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -110,15 +110,15 @@ class Connectivity:
         serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
     )
 
-    L2CrossChunkEdge = _AttributeArray(
-        pattern=b"l2_cross_edge_%d",
+    CrossChunkEdge = _AttributeArray(
+        pattern=b"cross_edge_%d",
         family_id="4",
         serializer=serializers.NumPyArray(
             dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
         ),
     )
 
-    CrossChunkEdge = _AttributeArray(
+    AtomicCrossChunkEdge = _AttributeArray(
         pattern=b"atomic_cross_edges_%d",
         family_id="3",
         serializer=serializers.NumPyArray(
diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 1973daacc..215929c41 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -56,12 +56,15 @@ def add_layer(
     edge_ids.extend(add_edge_ids)
     graph, _, _, graph_ids = flatgraph.build_gt_graph(edge_ids, make_directed=True)
     ccs = flatgraph.connected_components(graph)
+    connected_components = []
+    for cc in ccs:
+        connected_components.append(graph_ids[cc])
+
     _write_connected_components(
         cg,
         layer_id,
         parent_coords,
-        ccs,
-        graph_ids,
+        connected_components,
         get_valid_timestamp(time_stamp),
         n_threads > 1,
     )
@@ -126,12 +129,11 @@ def _write_connected_components(
     cg: ChunkedGraph,
     layer_id: int,
     parent_coords,
-    ccs,
-    graph_ids,
+    connected_components: list,
     time_stamp,
     use_threads=True,
 ) -> None:
-    if not ccs:
+    if len(connected_components) == 0:
         return
 
     node_layer_d_shared = {}
@@ -140,24 +142,20 @@ def _write_connected_components(
             cg, layer_id, parent_coords, use_threads=use_threads
         )
 
-    ccs_with_node_ids = []
-    for cc in ccs:
-        ccs_with_node_ids.append(graph_ids[cc])
-
     if not use_threads:
         _write(
             cg,
             layer_id,
             parent_coords,
-            ccs_with_node_ids,
+            connected_components,
             node_layer_d_shared,
             time_stamp,
             use_threads=use_threads,
         )
         return
 
-    task_size = int(math.ceil(len(ccs_with_node_ids) / mp.cpu_count() / 10))
-    chunked_ccs = chunked(ccs_with_node_ids, task_size)
+    task_size = int(math.ceil(len(connected_components) / mp.cpu_count() / 10))
+    chunked_ccs = chunked(connected_components, task_size)
     cg_info = cg.get_serialized_info()
     multi_args = []
     for ccs in chunked_ccs:
@@ -178,11 +176,17 @@ def _write_components_helper(args):
 
 
 def _write(
-    cg, layer_id, parent_coords, ccs, node_layer_d_shared, time_stamp, use_threads=True
+    cg,
+    layer_id,
+    parent_coords,
+    connected_components,
+    node_layer_d_shared,
+    time_stamp,
+    use_threads=True,
 ):
     parent_layer_ids = range(layer_id, cg.meta.layer_count + 1)
     cc_connections = {l: [] for l in parent_layer_ids}
-    for node_ids in ccs:
+    for node_ids in connected_components:
         layer = layer_id
         if len(node_ids) == 1:
             layer = node_layer_d_shared.get(node_ids[0], cg.meta.layer_count)
diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py
index a59bc9f20..42b6a01b5 100644
--- a/pychunkedgraph/ingest/create/atomic_layer.py
+++ b/pychunkedgraph/ingest/create/atomic_layer.py
@@ -126,7 +126,7 @@ def _process_component(
     for cc_layer in u_cce_layers:
         layer_out_edges = chunk_out_edges[cce_layers == cc_layer]
         if layer_out_edges.size:
-            col = attributes.Connectivity.CrossChunkEdge[cc_layer]
+            col = attributes.Connectivity.AtomicCrossChunkEdge[cc_layer]
             val_dict[col] = layer_out_edges
 
     r_key = serializers.serialize_uint64(parent_id)
@@ -165,7 +165,7 @@ def postprocess_atomic_chunk(
     )
 
     properties = [
-        attributes.Connectivity.CrossChunkEdge[l] for l in range(2, cg.meta.layer_count)
+        attributes.Connectivity.AtomicCrossChunkEdge[l] for l in range(2, cg.meta.layer_count)
     ]
 
     chunk_rr = cg.range_read_chunk(
@@ -190,7 +190,7 @@ def postprocess_atomic_chunk(
             l2_edges = np.zeros_like(edges)
             l2_edges[:, 0] = l2id
             l2_edges[:, 1] = cg.get_parents(edges[:, 1])
-            col = attributes.Connectivity.L2CrossChunkEdge[layer]
+            col = attributes.Connectivity.CrossChunkEdge[layer]
             val_dict[col] = np.unique(l2_edges, axis=0)
             val_dicts.append(val_dict)
 

From 515147c587a189fc463213cb95dbed218dd45858 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 19:40:48 +0000
Subject: [PATCH 013/105] fix: rename attr; better var names

---
 pychunkedgraph/graph/edits.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index be2eee1c6..4cb536ea7 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -565,7 +565,7 @@ def _get_atomic_cross_edges_val_dict(self):
         for id_ in new_ids:
             val_dict = {}
             for layer, edges in atomic_cross_edges_d[id_].items():
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+                val_dict[attributes.Connectivity.AtomicCrossChunkEdge[layer]] = edges
             val_dicts[id_] = val_dict
         return val_dicts
 

From 43e1e066745f8344779ba1ec0563850940f794b0 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 20:15:31 +0000
Subject: [PATCH 014/105] fix: add more docs; better var names

---
 .../graph/connectivity/cross_edges.py         | 79 ++++++++++++++-----
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/pychunkedgraph/graph/connectivity/cross_edges.py b/pychunkedgraph/graph/connectivity/cross_edges.py
index 99dc8df7f..d2dbcbb8c 100644
--- a/pychunkedgraph/graph/connectivity/cross_edges.py
+++ b/pychunkedgraph/graph/connectivity/cross_edges.py
@@ -64,6 +64,11 @@ def _get_children_chunk_cross_edges_helper(args) -> None:
 
 
 def _get_children_chunk_cross_edges(cg, atomic_chunks, layer) -> None:
+    """
+    Non parallelized version
+    Cross edges that connect children chunks.
+    The edges are between node IDs in the given layer (not atomic).
+    """
     cross_edges = [empty_2d]
     for layer2_chunk in atomic_chunks:
         edges = _read_atomic_chunk_cross_edges(cg, layer2_chunk, layer)
@@ -82,7 +87,11 @@ def _get_children_chunk_cross_edges(cg, atomic_chunks, layer) -> None:
 def _read_atomic_chunk_cross_edges(
     cg, chunk_coord: Sequence[int], cross_edge_layer: int
 ) -> np.ndarray:
-    cross_edge_col = attributes.Connectivity.L2CrossChunkEdge[cross_edge_layer]
+    """
+    Returns cross edges between l2 nodes in current chunk and
+    l1 supervoxels from neighbor chunks.
+    """
+    cross_edge_col = attributes.Connectivity.CrossChunkEdge[cross_edge_layer]
     range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, [cross_edge_layer])
 
     parent_neighboring_chunk_supervoxels_d = defaultdict(list)
@@ -93,8 +102,7 @@ def _read_atomic_chunk_cross_edges(
         parent_neighboring_chunk_supervoxels_d[l2id] = edges[:, 1]
 
     cross_edges = [empty_2d]
-    for l2id in parent_neighboring_chunk_supervoxels_d:
-        nebor_svs = parent_neighboring_chunk_supervoxels_d[l2id]
+    for l2id, nebor_svs in parent_neighboring_chunk_supervoxels_d.items():
         chunk_parent_ids = np.array([l2id] * len(nebor_svs), dtype=basetypes.NODE_ID)
         cross_edges.append(np.vstack([chunk_parent_ids, nebor_svs]).T)
     cross_edges = np.concatenate(cross_edges)
@@ -118,14 +126,14 @@ def get_chunk_nodes_cross_edge_layer(
 
     cg_info = cg.get_serialized_info()
     manager = mp.Manager()
-    ids_l_shared = manager.list()
-    layers_l_shared = manager.list()
+    node_ids_shared = manager.list()
+    node_layers_shared = manager.list()
     task_size = int(math.ceil(len(atomic_chunks) / mp.cpu_count() / 10))
     chunked_l2chunk_list = chunked(atomic_chunks, task_size)
     multi_args = []
     for atomic_chunks in chunked_l2chunk_list:
         multi_args.append(
-            (ids_l_shared, layers_l_shared, cg_info, atomic_chunks, layer)
+            (node_ids_shared, node_layers_shared, cg_info, atomic_chunks, layer)
         )
 
     multiprocess_func(
@@ -135,24 +143,28 @@ def get_chunk_nodes_cross_edge_layer(
     )
 
     node_layer_d_shared = manager.dict()
-    _find_min_layer(node_layer_d_shared, ids_l_shared, layers_l_shared)
+    _find_min_layer(node_layer_d_shared, node_ids_shared, node_layers_shared)
     return node_layer_d_shared
 
 
 def _get_chunk_nodes_cross_edge_layer_helper(args):
-    ids_l_shared, layers_l_shared, cg_info, atomic_chunks, layer = args
+    node_ids_shared, node_layers_shared, cg_info, atomic_chunks, layer = args
     cg = ChunkedGraph(**cg_info)
     node_layer_d = _get_chunk_nodes_cross_edge_layer(cg, atomic_chunks, layer)
-    ids_l_shared.append(np.fromiter(node_layer_d.keys(), dtype=basetypes.NODE_ID))
-    layers_l_shared.append(np.fromiter(node_layer_d.values(), dtype=np.uint8))
+    node_ids_shared.append(np.fromiter(node_layer_d.keys(), dtype=basetypes.NODE_ID))
+    node_layers_shared.append(np.fromiter(node_layer_d.values(), dtype=np.uint8))
 
 
 def _get_chunk_nodes_cross_edge_layer(cg, atomic_chunks, layer):
+    """
+    Non parallelized version
+    gets nodes in a chunk that are part of cross chunk edges
+    return_type dict {node_id: layer}
+    the lowest layer (>= current layer) at which a node_id is part of a cross edge
+    """
     atomic_node_layer_d = {}
     for atomic_chunk in atomic_chunks:
-        chunk_node_layer_d = _read_atomic_chunk_cross_edge_nodes(
-            cg, atomic_chunk, range(layer, cg.meta.layer_count + 1)
-        )
+        chunk_node_layer_d = _read_atomic_chunk_cross_edge_nodes(cg, atomic_chunk, layer)
         atomic_node_layer_d.update(chunk_node_layer_d)
 
     l2ids = np.fromiter(atomic_node_layer_d.keys(), dtype=basetypes.NODE_ID)
@@ -165,32 +177,57 @@ def _get_chunk_nodes_cross_edge_layer(cg, atomic_chunks, layer):
     return node_layer_d
 
 
-def _read_atomic_chunk_cross_edge_nodes(cg, chunk_coord, cross_edge_layers):
+def _read_atomic_chunk_cross_edge_nodes(cg, chunk_coord, layer):
+    """
+    the lowest layer at which an l2 node is part of a cross edge
+    """
     node_layer_d = {}
-    range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, cross_edge_layers)
+    relevant_layers = range(layer, cg.meta.layer_count + 1)
+    range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, relevant_layers)
     for l2id in l2ids:
-        for layer in cross_edge_layers:
-            if attributes.Connectivity.L2CrossChunkEdge[layer] in range_read[l2id]:
+        for layer in relevant_layers:
+            if attributes.Connectivity.CrossChunkEdge[layer] in range_read[l2id]:
                 node_layer_d[l2id] = layer
                 break
     return node_layer_d
 
 
-def _find_min_layer(node_layer_d_shared, ids_l_shared, layers_l_shared):
-    node_ids = np.concatenate(ids_l_shared)
-    layers = np.concatenate(layers_l_shared)
+def _find_min_layer(node_layer_d_shared, node_ids_shared, node_layers_shared):
+    """
+    `node_layer_d_shared`: DictProxy
+
+    `node_ids_shared`: ListProxy
+
+    `node_layers_shared`: ListProxy
+
+    Due to parallelization, there will be multiple values for min_layer of a node.
+    We need to find the global min_layer after all multiprocesses return.
+    For eg:
+        At some indices p and q, there will be a node_id x
+          i.e. `node_ids_shared[p] == node_ids_shared[q]`
+
+        and node_layers_shared[p] != node_layers_shared[q]
+        so we need:
+          `node_layer_d_shared[x] =  min(node_layers_shared[p], node_layers_shared[q])`
+    """
+    node_ids = np.concatenate(node_ids_shared)
+    layers = np.concatenate(node_layers_shared)
     for i, node_id in enumerate(node_ids):
         layer = node_layer_d_shared.get(node_id, layers[i])
         node_layer_d_shared[node_id] = min(layer, layers[i])
 
 
 def _read_atomic_chunk(cg, chunk_coord, layers):
+    """
+    read entire atomic chunk; all nodes and their relevant cross edges
+    filter out invalid nodes generated by failed tasks
+    """
     x, y, z = chunk_coord
     child_col = attributes.Hierarchy.Child
     range_read = cg.range_read_chunk(
         cg.get_chunk_id(layer=2, x=x, y=y, z=z),
         properties=[child_col]
-        + [attributes.Connectivity.L2CrossChunkEdge[l] for l in layers],
+        + [attributes.Connectivity.CrossChunkEdge[l] for l in layers],
     )
 
     row_ids = []

From decb4a95a54b2b61f522e51501a50ac09181069e Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 20:22:40 +0000
Subject: [PATCH 015/105] fix: move cross_edges module to ingest module; only
 used in ingest

---
 pychunkedgraph/graph/chunks/atomic.py            |  6 +++---
 pychunkedgraph/ingest/create/abstract_layers.py  |  4 ++--
 .../create}/cross_edges.py                       | 16 ++++++++--------
 3 files changed, 13 insertions(+), 13 deletions(-)
 rename pychunkedgraph/{graph/connectivity => ingest/create}/cross_edges.py (95%)

diff --git a/pychunkedgraph/graph/chunks/atomic.py b/pychunkedgraph/graph/chunks/atomic.py
index e3de065ff..b609f4cfb 100644
--- a/pychunkedgraph/graph/chunks/atomic.py
+++ b/pychunkedgraph/graph/chunks/atomic.py
@@ -1,3 +1,5 @@
+# pylint: disable=invalid-name, missing-docstring
+
 from typing import List
 from typing import Sequence
 from itertools import product
@@ -6,8 +8,6 @@
 
 from .utils import get_bounding_children_chunks
 from ..meta import ChunkedGraphMeta
-from ..utils.generic import get_valid_timestamp
-from ..utils import basetypes
 
 
 def get_touching_atomic_chunks(
@@ -27,7 +27,7 @@ def get_touching_atomic_chunks(
     chunk_offset = chunk_coords * atomic_chunk_count
     mid = (atomic_chunk_count // 2) - 1
 
-    # TODO (akhileshh) convert this for loop to numpy
+    # TODO (akhileshh) convert this for loop to numpy;
     # relevant chunks along touching planes at center
     for axis_1, axis_2 in product(*[range(atomic_chunk_count)] * 2):
         # x-y plane
diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 215929c41..c5a78d2ca 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -23,8 +23,8 @@
 from ...graph.utils.generic import get_valid_timestamp
 from ...graph.utils.generic import filter_failed_node_ids
 from ...graph.chunks.hierarchy import get_children_chunk_coords
-from ...graph.connectivity.cross_edges import get_children_chunk_cross_edges
-from ...graph.connectivity.cross_edges import get_chunk_nodes_cross_edge_layer
+from .cross_edges import get_children_chunk_cross_edges
+from .cross_edges import get_chunk_nodes_cross_edge_layer
 
 
 def add_layer(
diff --git a/pychunkedgraph/graph/connectivity/cross_edges.py b/pychunkedgraph/ingest/create/cross_edges.py
similarity index 95%
rename from pychunkedgraph/graph/connectivity/cross_edges.py
rename to pychunkedgraph/ingest/create/cross_edges.py
index d2dbcbb8c..481a5b6e5 100644
--- a/pychunkedgraph/graph/connectivity/cross_edges.py
+++ b/pychunkedgraph/ingest/create/cross_edges.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel
+# pylint: disable=invalid-name, missing-docstring
 
 import math
 import multiprocessing as mp
@@ -9,13 +9,13 @@
 import numpy as np
 from multiwrapper.multiprocessing_utils import multiprocess_func
 
-from .. import attributes
-from ..types import empty_2d
-from ..utils import basetypes
-from ..chunkedgraph import ChunkedGraph
-from ..utils.generic import filter_failed_node_ids
-from ..chunks.atomic import get_touching_atomic_chunks
-from ..chunks.atomic import get_bounding_atomic_chunks
+from ...graph import attributes
+from ...graph.types import empty_2d
+from ...graph.utils import basetypes
+from ...graph.chunkedgraph import ChunkedGraph
+from ...graph.utils.generic import filter_failed_node_ids
+from ...graph.chunks.atomic import get_touching_atomic_chunks
+from ...graph.chunks.atomic import get_bounding_atomic_chunks
 from ...utils.general import chunked
 
 

From 268445382d10938af42f1d8f4b31cbc0debc3f0a Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 20:49:13 +0000
Subject: [PATCH 016/105] fix: reduce mem use; var names; remove unused code

---
 pychunkedgraph/ingest/cli.py                  | 74 ++++++-------------
 pychunkedgraph/ingest/cluster.py              | 47 +-----------
 .../ingest/create/abstract_layers.py          | 23 ++----
 pychunkedgraph/ingest/manager.py              |  6 +-
 tracker.py                                    | 22 ------
 5 files changed, 38 insertions(+), 134 deletions(-)
 delete mode 100644 tracker.py

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 145c9bea6..486224cec 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-function-docstring, import-outside-toplevel
+# pylint: disable=invalid-name, missing-function-docstring, unspecified-encoding
 
 """
 cli for running ingest
@@ -12,10 +12,14 @@
 from flask.cli import AppGroup
 from rq import Queue
 
+from .cluster import create_atomic_chunk
+from .cluster import create_parent_chunk
 from .cluster import enqueue_atomic_tasks
+from .cluster import randomize_grid_points
 from .manager import IngestionManager
 from .utils import bootstrap
-from .cluster import randomize_grid_points
+from .utils import chunk_id_str
+from .create.abstract_layers import add_layer
 from ..graph.chunkedgraph import ChunkedGraph
 from ..utils.redis import get_redis_connection
 from ..utils.redis import keys as r_keys
@@ -90,7 +94,7 @@ def pickle_imanager(graph_id: str, dataset: click.Path, raw: bool):
 
     meta, ingest_config, _ = bootstrap(graph_id, config=config, raw=raw)
     imanager = IngestionManager(ingest_config, meta)
-    imanager.redis
+    imanager.redis  # pylint: disable=pointless-statement
 
 
 @ingest_cli.command("layer")
@@ -100,11 +104,6 @@ def queue_layer(parent_layer):
     Queue all chunk tasks at a given layer.
     Must be used when all the chunks at `parent_layer - 1` have completed.
     """
-    from itertools import product
-    import numpy as np
-    from .cluster import create_parent_chunk
-    from .utils import chunk_id_str
-
     assert parent_layer > 2, "This command is for layers 3 and above."
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
@@ -115,38 +114,15 @@ def queue_layer(parent_layer):
         bounds = imanager.cg_meta.layer_chunk_bounds[parent_layer]
         chunk_coords = randomize_grid_points(*bounds)
 
-    def get_chunks_not_done(coords: list) -> list:
-        """check for set membership in redis in batches"""
-        coords_strs = ["_".join(map(str, coord)) for coord in coords]
-        try:
-            completed = imanager.redis.smismember(f"{parent_layer}c", coords_strs)
-        except Exception:
-            return coords
-        return [coord for coord, c in zip(coords, completed) if not c]
-
-    batch_size = int(environ.get("JOB_BATCH_SIZE", 10000))
-    batches = chunked(chunk_coords, batch_size)
-    q = imanager.get_task_queue(f"l{parent_layer}")
-
-    for batch in batches:
-        _coords = get_chunks_not_done(batch)
-        # buffer for optimal use of redis memory
-        if len(q) > int(environ.get("QUEUE_SIZE", 100000)):
-            interval = int(environ.get("QUEUE_INTERVAL", 300))
-            sleep(interval)
-
-        job_datas = []
-        for chunk_coord in _coords:
-            job_datas.append(
-                Queue.prepare_data(
-                    create_parent_chunk,
-                    args=(parent_layer, chunk_coord),
-                    result_ttl=0,
-                    job_id=chunk_id_str(parent_layer, chunk_coord),
-                    timeout=f"{int(parent_layer * parent_layer)}m",
-                )
-            )
-        q.enqueue_many(job_datas)
+    for coords in chunk_coords:
+        task_q = imanager.get_task_queue(f"l{parent_layer}")
+        task_q.enqueue(
+            create_parent_chunk,
+            job_id=chunk_id_str(parent_layer, coords),
+            job_timeout=f"{int(parent_layer * parent_layer)}m",
+            result_ttl=0,
+            args=(parent_layer, coords),
+        )
 
 
 @ingest_cli.command("status")
@@ -156,16 +132,16 @@ def ingest_status():
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
 
     layer = 2
-    completed = redis.scard(f"{layer}c")
-    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]}")
+    done = redis.scard(f"{layer}c")
+    print(f"{layer}\t: {done} / {imanager.cg_meta.layer_chunk_counts[0]}")
 
-    completed = redis.scard(f"{layer}c-postprocess")
-    print(f"{layer}\t: {completed} / {imanager.cg_meta.layer_chunk_counts[0]} [postprocess]")
+    done = redis.scard(f"{layer}c-postprocess")
+    print(f"{layer}\t: {done} / {imanager.cg_meta.layer_chunk_counts[0]} [postprocess]")
 
     layers = range(3, imanager.cg_meta.layer_count + 1)
     for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts[1:]):
-        completed = redis.scard(f"{layer}c")
-        print(f"{layer}\t: {completed} / {layer_count}")
+        done = redis.scard(f"{layer}c")
+        print(f"{layer}\t: {done} / {layer_count}")
 
 
 @ingest_cli.command("chunk")
@@ -173,17 +149,13 @@ def ingest_status():
 @click.argument("chunk_info", nargs=4, type=int)
 def ingest_chunk(queue: str, chunk_info):
     """Manually queue chunk when a job is stuck for whatever reason."""
-    from .cluster import _create_atomic_chunk
-    from .cluster import create_parent_chunk
-    from .utils import chunk_id_str
-
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
     layer = chunk_info[0]
     coords = chunk_info[1:]
     queue = imanager.get_task_queue(queue)
     if layer == 2:
-        func = _create_atomic_chunk
+        func = create_atomic_chunk
         args = (coords,)
     else:
         func = create_parent_chunk
diff --git a/pychunkedgraph/ingest/cluster.py b/pychunkedgraph/ingest/cluster.py
index 2b7927869..9394c4e26 100644
--- a/pychunkedgraph/ingest/cluster.py
+++ b/pychunkedgraph/ingest/cluster.py
@@ -33,49 +33,6 @@ def _post_task_completion(
     imanager.redis.sadd(f"{layer}c{pprocess}", chunk_str)
 
 
-def enqueue_parent_task(
-    parent_layer: int,
-    parent_coords: Sequence[int],
-):
-    redis = get_redis_connection()
-    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    parent_id_str = chunk_id_str(parent_layer, parent_coords)
-    parent_chunk_str = "_".join(map(str, parent_coords))
-
-    children_done = redis.scard(parent_id_str)
-    # if zero then this key was deleted and parent already queued.
-    if children_done == 0:
-        print("parent already queued.")
-        return
-
-    # if the previous layer is complete
-    # no need to check children progress for each parent chunk
-    child_layer = parent_layer - 1
-    child_layer_done = redis.scard(f"{child_layer}c")
-    child_layer_count = imanager.cg_meta.layer_chunk_counts[child_layer - 2]
-    child_layer_finished = child_layer_done == child_layer_count
-
-    if not child_layer_finished:
-        children_count = int(redis.hget(parent_layer, parent_chunk_str).decode("utf-8"))
-        if children_done != children_count:
-            print("children not done.")
-            return
-
-    queue = imanager.get_task_queue(f"l{parent_layer}")
-    queue.enqueue(
-        create_parent_chunk,
-        job_id=parent_id_str,
-        job_timeout=f"{int(parent_layer * parent_layer)}m",
-        result_ttl=0,
-        args=(
-            parent_layer,
-            parent_coords,
-        ),
-    )
-    redis.hdel(parent_layer, parent_chunk_str)
-    redis.delete(parent_id_str)
-
-
 def create_parent_chunk(
     parent_layer: int,
     parent_coords: Sequence[int],
@@ -137,7 +94,7 @@ def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
             continue
         job_datas.append(
             RQueue.prepare_data(
-                _create_atomic_chunk,
+                create_atomic_chunk,
                 args=(chunk_coord, postprocess),
                 timeout=environ.get("L2JOB_TIMEOUT", "3m"),
                 result_ttl=0,
@@ -150,7 +107,7 @@ def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
     q.enqueue_many(job_datas)
 
 
-def _create_atomic_chunk(coords: Sequence[int], postprocess: bool = False):
+def create_atomic_chunk(coords: Sequence[int], postprocess: bool = False):
     """Creates single atomic chunk"""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index c5a78d2ca..8912a2d53 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -39,26 +39,20 @@ def add_layer(
     if not children_coords.size:
         children_coords = get_children_chunk_coords(cg.meta, layer_id, parent_coords)
     children_ids = _read_children_chunks(cg, layer_id, children_coords, n_threads > 1)
-    edge_ids = get_children_chunk_cross_edges(
+    cross_edges = get_children_chunk_cross_edges(
         cg, layer_id, parent_coords, use_threads=n_threads > 1
     )
 
     node_layers = cg.get_chunk_layers(children_ids)
-    edge_layers = cg.get_chunk_layers(np.unique(edge_ids))
+    edge_layers = cg.get_chunk_layers(np.unique(cross_edges))
     assert np.all(node_layers < layer_id), "invalid node layers"
     assert np.all(edge_layers < layer_id), "invalid edge layers"
-    # Extract connected components
-    # isolated_node_mask = ~np.in1d(children_ids, np.unique(edge_ids))
-    # add_node_ids = children_ids[isolated_node_mask].squeeze()
-    add_edge_ids = np.vstack([children_ids, children_ids]).T
-
-    edge_ids = list(edge_ids)
-    edge_ids.extend(add_edge_ids)
-    graph, _, _, graph_ids = flatgraph.build_gt_graph(edge_ids, make_directed=True)
-    ccs = flatgraph.connected_components(graph)
-    connected_components = []
-    for cc in ccs:
-        connected_components.append(graph_ids[cc])
+
+    cross_edges = list(cross_edges)
+    cross_edges.extend(np.vstack([children_ids, children_ids]).T) # add self-edges
+    graph, _, _, graph_ids = flatgraph.build_gt_graph(cross_edges, make_directed=True)
+    raw_ccs = flatgraph.connected_components(graph) # connected components with indices
+    connected_components = [graph_ids[cc] for cc in raw_ccs]
 
     _write_connected_components(
         cg,
@@ -68,7 +62,6 @@ def add_layer(
         get_valid_timestamp(time_stamp),
         n_threads > 1,
     )
-    return f"{layer_id}_{'_'.join(map(str, parent_coords))}"
 
 
 def _read_children_chunks(
diff --git a/pychunkedgraph/ingest/manager.py b/pychunkedgraph/ingest/manager.py
index f5f870810..55e7d253f 100644
--- a/pychunkedgraph/ingest/manager.py
+++ b/pychunkedgraph/ingest/manager.py
@@ -1,3 +1,5 @@
+# pylint: disable=invalid-name, missing-docstring
+
 import pickle
 
 from . import IngestConfig
@@ -15,7 +17,9 @@ def __init__(self, config: IngestConfig, chunkedgraph_meta: ChunkedGraphMeta):
         self._cg = None
         self._redis = None
         self._task_queues = {}
-        self.redis  # initiate and cache info
+
+        # initiate redis and cache info
+        self.redis  # pylint: disable=pointless-statement
 
     @property
     def config(self):
diff --git a/tracker.py b/tracker.py
deleted file mode 100644
index d2ae63cb3..000000000
--- a/tracker.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import sys
-from rq import Connection, Worker
-
-# Preload libraries from pychunkedgraph.ingest.cluster
-from typing import Sequence, Tuple
-
-import numpy as np
-
-from pychunkedgraph.ingest.utils import chunk_id_str
-from pychunkedgraph.ingest.manager import IngestionManager
-from pychunkedgraph.ingest.common import get_atomic_chunk_data
-from pychunkedgraph.ingest.ran_agglomeration import get_active_edges
-from pychunkedgraph.ingest.create.atomic_layer import add_atomic_edges
-from pychunkedgraph.ingest.create.abstract_layers import add_layer
-from pychunkedgraph.graph.meta import ChunkedGraphMeta
-from pychunkedgraph.graph.chunks.hierarchy import get_children_chunk_coords
-from pychunkedgraph.utils.redis import keys as r_keys
-from pychunkedgraph.utils.redis import get_redis_connection
-
-qs = sys.argv[1:]
-w = Worker(qs, connection=get_redis_connection())
-w.work()
\ No newline at end of file

From cf75901194de892fdca2d10084b7f5a6b6097ffe Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 21:06:45 +0000
Subject: [PATCH 017/105] fix: adds cg typehint

---
 pychunkedgraph/ingest/create/cross_edges.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/pychunkedgraph/ingest/create/cross_edges.py b/pychunkedgraph/ingest/create/cross_edges.py
index 481a5b6e5..78b7309fe 100644
--- a/pychunkedgraph/ingest/create/cross_edges.py
+++ b/pychunkedgraph/ingest/create/cross_edges.py
@@ -20,7 +20,7 @@
 
 
 def get_children_chunk_cross_edges(
-    cg, layer, chunk_coord, *, use_threads=True
+    cg: ChunkedGraph, layer, chunk_coord, *, use_threads=True
 ) -> np.ndarray:
     """
     Cross edges that connect children chunks.
@@ -63,7 +63,7 @@ def _get_children_chunk_cross_edges_helper(args) -> None:
     edge_ids_shared.append(_get_children_chunk_cross_edges(cg, atomic_chunks, layer))
 
 
-def _get_children_chunk_cross_edges(cg, atomic_chunks, layer) -> None:
+def _get_children_chunk_cross_edges(cg: ChunkedGraph, atomic_chunks, layer) -> None:
     """
     Non parallelized version
     Cross edges that connect children chunks.
@@ -85,7 +85,7 @@ def _get_children_chunk_cross_edges(cg, atomic_chunks, layer) -> None:
 
 
 def _read_atomic_chunk_cross_edges(
-    cg, chunk_coord: Sequence[int], cross_edge_layer: int
+    cg: ChunkedGraph, chunk_coord: Sequence[int], cross_edge_layer: int
 ) -> np.ndarray:
     """
     Returns cross edges between l2 nodes in current chunk and
@@ -110,7 +110,7 @@ def _read_atomic_chunk_cross_edges(
 
 
 def get_chunk_nodes_cross_edge_layer(
-    cg, layer: int, chunk_coord: Sequence[int], use_threads=True
+    cg: ChunkedGraph, layer: int, chunk_coord: Sequence[int], use_threads=True
 ) -> Dict:
     """
     gets nodes in a chunk that are part of cross chunk edges
@@ -155,7 +155,7 @@ def _get_chunk_nodes_cross_edge_layer_helper(args):
     node_layers_shared.append(np.fromiter(node_layer_d.values(), dtype=np.uint8))
 
 
-def _get_chunk_nodes_cross_edge_layer(cg, atomic_chunks, layer):
+def _get_chunk_nodes_cross_edge_layer(cg: ChunkedGraph, atomic_chunks, layer):
     """
     Non parallelized version
     gets nodes in a chunk that are part of cross chunk edges
@@ -164,7 +164,9 @@ def _get_chunk_nodes_cross_edge_layer(cg, atomic_chunks, layer):
     """
     atomic_node_layer_d = {}
     for atomic_chunk in atomic_chunks:
-        chunk_node_layer_d = _read_atomic_chunk_cross_edge_nodes(cg, atomic_chunk, layer)
+        chunk_node_layer_d = _read_atomic_chunk_cross_edge_nodes(
+            cg, atomic_chunk, layer
+        )
         atomic_node_layer_d.update(chunk_node_layer_d)
 
     l2ids = np.fromiter(atomic_node_layer_d.keys(), dtype=basetypes.NODE_ID)
@@ -177,7 +179,7 @@ def _get_chunk_nodes_cross_edge_layer(cg, atomic_chunks, layer):
     return node_layer_d
 
 
-def _read_atomic_chunk_cross_edge_nodes(cg, chunk_coord, layer):
+def _read_atomic_chunk_cross_edge_nodes(cg: ChunkedGraph, chunk_coord, layer):
     """
     the lowest layer at which an l2 node is part of a cross edge
     """
@@ -217,7 +219,7 @@ def _find_min_layer(node_layer_d_shared, node_ids_shared, node_layers_shared):
         node_layer_d_shared[node_id] = min(layer, layers[i])
 
 
-def _read_atomic_chunk(cg, chunk_coord, layers):
+def _read_atomic_chunk(cg: ChunkedGraph, chunk_coord, layers):
     """
     read entire atomic chunk; all nodes and their relevant cross edges
     filter out invalid nodes generated by failed tasks

From 3b5252783d658b448c9e11133047ea57b0057271 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 21:32:58 +0000
Subject: [PATCH 018/105] fix: reduce loc

---
 .../ingest/create/abstract_layers.py          | 60 ++++++-------------
 pychunkedgraph/ingest/create/cross_edges.py   |  2 +-
 2 files changed, 20 insertions(+), 42 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 8912a2d53..31610aeab 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -49,9 +49,9 @@ def add_layer(
     assert np.all(edge_layers < layer_id), "invalid edge layers"
 
     cross_edges = list(cross_edges)
-    cross_edges.extend(np.vstack([children_ids, children_ids]).T) # add self-edges
+    cross_edges.extend(np.vstack([children_ids, children_ids]).T)  # add self-edges
     graph, _, _, graph_ids = flatgraph.build_gt_graph(cross_edges, make_directed=True)
-    raw_ccs = flatgraph.connected_components(graph) # connected components with indices
+    raw_ccs = flatgraph.connected_components(graph)  # connected components with indices
     connected_components = [graph_ids[cc] for cc in raw_ccs]
 
     _write_connected_components(
@@ -119,42 +119,26 @@ def _read_chunk(children_ids_shared, cg: ChunkedGraph, layer_id: int, chunk_coor
 
 
 def _write_connected_components(
-    cg: ChunkedGraph,
-    layer_id: int,
-    parent_coords,
-    connected_components: list,
-    time_stamp,
-    use_threads=True,
-) -> None:
-    if len(connected_components) == 0:
+    cg, layer, pcoords, components, cross_edges, time_stamp, use_threads=True
+):
+    if len(components) == 0:
         return
 
-    node_layer_d_shared = {}
-    if layer_id < cg.meta.layer_count:
-        node_layer_d_shared = get_chunk_nodes_cross_edge_layer(
-            cg, layer_id, parent_coords, use_threads=use_threads
-        )
+    node_layer_d = {}
+    if layer < cg.meta.layer_count:
+        node_layer_d = get_chunk_nodes_cross_edge_layer(cg, layer, pcoords, use_threads)
 
     if not use_threads:
-        _write(
-            cg,
-            layer_id,
-            parent_coords,
-            connected_components,
-            node_layer_d_shared,
-            time_stamp,
-            use_threads=use_threads,
-        )
+        _write(cg, layer, pcoords, components, cross_edges, node_layer_d, time_stamp)
         return
 
-    task_size = int(math.ceil(len(connected_components) / mp.cpu_count() / 10))
-    chunked_ccs = chunked(connected_components, task_size)
+    task_size = int(math.ceil(len(components) / mp.cpu_count() / 10))
+    chunked_ccs = chunked(components, task_size)
     cg_info = cg.get_serialized_info()
     multi_args = []
     for ccs in chunked_ccs:
-        multi_args.append(
-            (cg_info, layer_id, parent_coords, ccs, node_layer_d_shared, time_stamp)
-        )
+        args = (cg_info, layer, pcoords, ccs, cross_edges, node_layer_d, time_stamp)
+        multi_args.append(args)
     mu.multiprocess_func(
         _write_components_helper,
         multi_args,
@@ -163,26 +147,20 @@ def _write_connected_components(
 
 
 def _write_components_helper(args):
-    cg_info, layer_id, parent_coords, ccs, node_layer_d_shared, time_stamp = args
+    cg_info, layer, pcoords, ccs, cross_edges, node_layer_d, time_stamp = args
     cg = ChunkedGraph(**cg_info)
-    _write(cg, layer_id, parent_coords, ccs, node_layer_d_shared, time_stamp)
+    _write(cg, layer, pcoords, ccs, cross_edges, node_layer_d, time_stamp)
 
 
 def _write(
-    cg,
-    layer_id,
-    parent_coords,
-    connected_components,
-    node_layer_d_shared,
-    time_stamp,
-    use_threads=True,
+    cg, layer_id, parent_coords, components, cross_edges, node_layer_d, time_stamp
 ):
     parent_layer_ids = range(layer_id, cg.meta.layer_count + 1)
     cc_connections = {l: [] for l in parent_layer_ids}
-    for node_ids in connected_components:
+    for node_ids in components:
         layer = layer_id
         if len(node_ids) == 1:
-            layer = node_layer_d_shared.get(node_ids[0], cg.meta.layer_count)
+            layer = node_layer_d.get(node_ids[0], cg.meta.layer_count)
         cc_connections[layer].append(node_ids)
 
     rows = []
@@ -199,7 +177,7 @@ def _write(
         reserved_parent_ids = cg.id_client.create_node_ids(
             parent_chunk_id,
             size=len(cc_connections[parent_layer_id]),
-            root_chunk=parent_layer_id == cg.meta.layer_count and use_threads,
+            root_chunk=parent_layer_id == cg.meta.layer_count,
         )
 
         for i_cc, node_ids in enumerate(cc_connections[parent_layer_id]):
diff --git a/pychunkedgraph/ingest/create/cross_edges.py b/pychunkedgraph/ingest/create/cross_edges.py
index 78b7309fe..b7a888b27 100644
--- a/pychunkedgraph/ingest/create/cross_edges.py
+++ b/pychunkedgraph/ingest/create/cross_edges.py
@@ -24,7 +24,7 @@ def get_children_chunk_cross_edges(
 ) -> np.ndarray:
     """
     Cross edges that connect children chunks.
-    The edges are between node IDs in the given layer (not atomic).
+    The edges are between node IDs in the given layer.
     """
     atomic_chunks = get_touching_atomic_chunks(cg.meta, layer, chunk_coord)
     if len(atomic_chunks) == 0:

From 7a95a5b16989ee7b76a2f75373d57e1182a212a7 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 21:36:46 +0000
Subject: [PATCH 019/105] fix: use shorter name

---
 .../ingest/create/abstract_layers.py          | 25 +++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 31610aeab..107ac5714 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -39,18 +39,18 @@ def add_layer(
     if not children_coords.size:
         children_coords = get_children_chunk_coords(cg.meta, layer_id, parent_coords)
     children_ids = _read_children_chunks(cg, layer_id, children_coords, n_threads > 1)
-    cross_edges = get_children_chunk_cross_edges(
+    cx_edges = get_children_chunk_cross_edges(
         cg, layer_id, parent_coords, use_threads=n_threads > 1
     )
 
     node_layers = cg.get_chunk_layers(children_ids)
-    edge_layers = cg.get_chunk_layers(np.unique(cross_edges))
+    edge_layers = cg.get_chunk_layers(np.unique(cx_edges))
     assert np.all(node_layers < layer_id), "invalid node layers"
     assert np.all(edge_layers < layer_id), "invalid edge layers"
 
-    cross_edges = list(cross_edges)
-    cross_edges.extend(np.vstack([children_ids, children_ids]).T)  # add self-edges
-    graph, _, _, graph_ids = flatgraph.build_gt_graph(cross_edges, make_directed=True)
+    cx_edges = list(cx_edges)
+    cx_edges.extend(np.vstack([children_ids, children_ids]).T)  # add self-edges
+    graph, _, _, graph_ids = flatgraph.build_gt_graph(cx_edges, make_directed=True)
     raw_ccs = flatgraph.connected_components(graph)  # connected components with indices
     connected_components = [graph_ids[cc] for cc in raw_ccs]
 
@@ -59,6 +59,7 @@ def add_layer(
         layer_id,
         parent_coords,
         connected_components,
+        cx_edges,
         get_valid_timestamp(time_stamp),
         n_threads > 1,
     )
@@ -119,7 +120,7 @@ def _read_chunk(children_ids_shared, cg: ChunkedGraph, layer_id: int, chunk_coor
 
 
 def _write_connected_components(
-    cg, layer, pcoords, components, cross_edges, time_stamp, use_threads=True
+    cg, layer, pcoords, components, cx_edges, time_stamp, use_threads=True
 ):
     if len(components) == 0:
         return
@@ -129,7 +130,7 @@ def _write_connected_components(
         node_layer_d = get_chunk_nodes_cross_edge_layer(cg, layer, pcoords, use_threads)
 
     if not use_threads:
-        _write(cg, layer, pcoords, components, cross_edges, node_layer_d, time_stamp)
+        _write(cg, layer, pcoords, components, cx_edges, node_layer_d, time_stamp)
         return
 
     task_size = int(math.ceil(len(components) / mp.cpu_count() / 10))
@@ -137,7 +138,7 @@ def _write_connected_components(
     cg_info = cg.get_serialized_info()
     multi_args = []
     for ccs in chunked_ccs:
-        args = (cg_info, layer, pcoords, ccs, cross_edges, node_layer_d, time_stamp)
+        args = (cg_info, layer, pcoords, ccs, cx_edges, node_layer_d, time_stamp)
         multi_args.append(args)
     mu.multiprocess_func(
         _write_components_helper,
@@ -147,14 +148,12 @@ def _write_connected_components(
 
 
 def _write_components_helper(args):
-    cg_info, layer, pcoords, ccs, cross_edges, node_layer_d, time_stamp = args
+    cg_info, layer, pcoords, ccs, cx_edges, node_layer_d, time_stamp = args
     cg = ChunkedGraph(**cg_info)
-    _write(cg, layer, pcoords, ccs, cross_edges, node_layer_d, time_stamp)
+    _write(cg, layer, pcoords, ccs, cx_edges, node_layer_d, time_stamp)
 
 
-def _write(
-    cg, layer_id, parent_coords, components, cross_edges, node_layer_d, time_stamp
-):
+def _write(cg, layer_id, parent_coords, components, cx_edges, node_layer_d, time_stamp):
     parent_layer_ids = range(layer_id, cg.meta.layer_count + 1)
     cc_connections = {l: [] for l in parent_layer_ids}
     for node_ids in components:

From 586f4e00a4cdc1e2054d4353fff2495e73669b9e Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 22:13:44 +0000
Subject: [PATCH 020/105] feat: cache cx edges at each layer

---
 .../ingest/create/abstract_layers.py          | 49 ++++++++++++-------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 107ac5714..148a370ba 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -9,6 +9,7 @@
 import multiprocessing as mp
 from typing import Optional
 from typing import Sequence
+from collections import defaultdict
 
 import numpy as np
 from multiwrapper import multiprocessing_utils as mu
@@ -153,7 +154,15 @@ def _write_components_helper(args):
     _write(cg, layer, pcoords, ccs, cx_edges, node_layer_d, time_stamp)
 
 
-def _write(cg, layer_id, parent_coords, components, cx_edges, node_layer_d, time_stamp):
+def _write(
+    cg: ChunkedGraph,
+    layer_id,
+    parent_coords,
+    components,
+    cx_edges,
+    node_layer_d,
+    time_stamp,
+):
     parent_layer_ids = range(layer_id, cg.meta.layer_count + 1)
     cc_connections = {l: [] for l in parent_layer_ids}
     for node_ids in components:
@@ -180,24 +189,28 @@ def _write(cg, layer_id, parent_coords, components, cx_edges, node_layer_d, time
         )
 
         for i_cc, node_ids in enumerate(cc_connections[parent_layer_id]):
-            parent_id = reserved_parent_ids[i_cc]
-            for node_id in node_ids:
-                rows.append(
-                    cg.client.mutate_row(
-                        serializers.serialize_uint64(node_id),
-                        {attributes.Hierarchy.Parent: parent_id},
-                        time_stamp=time_stamp,
-                    )
-                )
-
-            rows.append(
-                cg.client.mutate_row(
-                    serializers.serialize_uint64(parent_id),
-                    {attributes.Hierarchy.Child: node_ids},
-                    time_stamp=time_stamp,
-                )
-            )
+            node_cx_edges_d = defaultdict(lambda: types.empty_2d)
+            for node in node_ids:
+                mask0 = cx_edges[:, 0] == node
+                mask1 = cx_edges[:, 1] == node
+                node_cx_edges_d[node] = cx_edges[mask0 | mask1]
 
+            parent_id = reserved_parent_ids[i_cc]
+            for node in node_ids:
+                row_id = serializers.serialize_uint64(node)
+                val_dict = {attributes.Hierarchy.Parent: parent_id}
+
+                node_cx_edges = node_cx_edges_d[node]
+                cx_layers = cg.get_cross_chunk_edges_layer(node_cx_edges)
+                for layer in set(cx_layers):
+                    layer_mask = cx_layers == layer
+                    col = attributes.Connectivity.CrossChunkEdge[layer]
+                    val_dict[col] = node_cx_edges[layer_mask]
+                rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
+
+            row_id = serializers.serialize_uint64(parent_id)
+            val_dict = {attributes.Hierarchy.Child: node_ids}
+            rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
             if len(rows) > 100000:
                 cg.client.write(rows)
                 rows = []

From 1486cac898991b7cdd2dc4478a45fc18aff84548 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 22:24:49 +0000
Subject: [PATCH 021/105] fix: convert array type

---
 pychunkedgraph/ingest/create/abstract_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 148a370ba..f1341419d 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -176,7 +176,7 @@ def _write(
     parent_chunk_id = cg.get_chunk_id(layer=layer_id, x=x, y=y, z=z)
     parent_chunk_id_dict = cg.get_parent_chunk_id_dict(parent_chunk_id)
 
-    # Iterate through layers
+    cx_edges = np.array(cx_edges, dtype=basetypes.NODE_ID)
     for parent_layer_id in parent_layer_ids:
         if len(cc_connections[parent_layer_id]) == 0:
             continue

From 1695514d56797d37669ba9c5609cf3e0490537c2 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 22:50:51 +0000
Subject: [PATCH 022/105] fix: use atomic edges during ingest

---
 pychunkedgraph/graph/cache.py               | 1 +
 pychunkedgraph/ingest/create/cross_edges.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py
index f60b6ca92..8c824c732 100644
--- a/pychunkedgraph/graph/cache.py
+++ b/pychunkedgraph/graph/cache.py
@@ -1,3 +1,4 @@
+# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel
 """
 Cache nodes, parents, children and cross edges.
 """
diff --git a/pychunkedgraph/ingest/create/cross_edges.py b/pychunkedgraph/ingest/create/cross_edges.py
index b7a888b27..c7f45e9eb 100644
--- a/pychunkedgraph/ingest/create/cross_edges.py
+++ b/pychunkedgraph/ingest/create/cross_edges.py
@@ -91,7 +91,7 @@ def _read_atomic_chunk_cross_edges(
     Returns cross edges between l2 nodes in current chunk and
     l1 supervoxels from neighbor chunks.
     """
-    cross_edge_col = attributes.Connectivity.CrossChunkEdge[cross_edge_layer]
+    cross_edge_col = attributes.Connectivity.AtomicCrossChunkEdge[cross_edge_layer]
     range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, [cross_edge_layer])
 
     parent_neighboring_chunk_supervoxels_d = defaultdict(list)
@@ -188,7 +188,7 @@ def _read_atomic_chunk_cross_edge_nodes(cg: ChunkedGraph, chunk_coord, layer):
     range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, relevant_layers)
     for l2id in l2ids:
         for layer in relevant_layers:
-            if attributes.Connectivity.CrossChunkEdge[layer] in range_read[l2id]:
+            if attributes.Connectivity.AtomicCrossChunkEdge[layer] in range_read[l2id]:
                 node_layer_d[l2id] = layer
                 break
     return node_layer_d
@@ -229,7 +229,7 @@ def _read_atomic_chunk(cg: ChunkedGraph, chunk_coord, layers):
     range_read = cg.range_read_chunk(
         cg.get_chunk_id(layer=2, x=x, y=y, z=z),
         properties=[child_col]
-        + [attributes.Connectivity.CrossChunkEdge[l] for l in layers],
+        + [attributes.Connectivity.AtomicCrossChunkEdge[l] for l in layers],
     )
 
     row_ids = []

From 29283d1674a45fc057ce28daa19a3b60b36d3d8a Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 23:23:18 +0000
Subject: [PATCH 023/105] fix: tests

---
 pychunkedgraph/ingest/create/abstract_layers.py | 5 +++--
 pychunkedgraph/ingest/create/cross_edges.py     | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index f1341419d..63b613ae6 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -131,7 +131,7 @@ def _write_connected_components(
         node_layer_d = get_chunk_nodes_cross_edge_layer(cg, layer, pcoords, use_threads)
 
     if not use_threads:
-        _write(cg, layer, pcoords, components, cx_edges, node_layer_d, time_stamp)
+        _write(cg, layer, pcoords, components, cx_edges, node_layer_d, time_stamp, use_threads)
         return
 
     task_size = int(math.ceil(len(components) / mp.cpu_count() / 10))
@@ -162,6 +162,7 @@ def _write(
     cx_edges,
     node_layer_d,
     time_stamp,
+    use_threads=True,
 ):
     parent_layer_ids = range(layer_id, cg.meta.layer_count + 1)
     cc_connections = {l: [] for l in parent_layer_ids}
@@ -185,7 +186,7 @@ def _write(
         reserved_parent_ids = cg.id_client.create_node_ids(
             parent_chunk_id,
             size=len(cc_connections[parent_layer_id]),
-            root_chunk=parent_layer_id == cg.meta.layer_count,
+            root_chunk=parent_layer_id == cg.meta.layer_count and use_threads,
         )
 
         for i_cc, node_ids in enumerate(cc_connections[parent_layer_id]):
diff --git a/pychunkedgraph/ingest/create/cross_edges.py b/pychunkedgraph/ingest/create/cross_edges.py
index c7f45e9eb..5f0ebf8df 100644
--- a/pychunkedgraph/ingest/create/cross_edges.py
+++ b/pychunkedgraph/ingest/create/cross_edges.py
@@ -184,7 +184,7 @@ def _read_atomic_chunk_cross_edge_nodes(cg: ChunkedGraph, chunk_coord, layer):
     the lowest layer at which an l2 node is part of a cross edge
     """
     node_layer_d = {}
-    relevant_layers = range(layer, cg.meta.layer_count + 1)
+    relevant_layers = range(layer, cg.meta.layer_count)
     range_read, l2ids = _read_atomic_chunk(cg, chunk_coord, relevant_layers)
     for l2id in l2ids:
         for layer in relevant_layers:

From f120409917932e9e5274c20633bf386dee7c0254 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 23:55:02 +0000
Subject: [PATCH 024/105] fix: remove postprocess step

---
 pychunkedgraph/ingest/cli.py                 | 22 +---------
 pychunkedgraph/ingest/cluster.py             | 31 +++++--------
 pychunkedgraph/ingest/create/atomic_layer.py | 46 --------------------
 3 files changed, 12 insertions(+), 87 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 486224cec..2ad51ca18 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -67,16 +67,6 @@ def ingest_graph(
     enqueue_atomic_tasks(IngestionManager(ingest_config, meta))
 
 
-@ingest_cli.command("postprocess")
-def postprocess():
-    """
-    Run postprocessing step on level 2 chunks.
-    """
-    redis = get_redis_connection()
-    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    enqueue_atomic_tasks(imanager, postprocess=True)
-
-
 @ingest_cli.command("imanager")
 @click.argument("graph_id", type=str)
 @click.argument("dataset", type=click.Path(exists=True))
@@ -130,16 +120,8 @@ def ingest_status():
     """Print ingest status to console by layer."""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-
-    layer = 2
-    done = redis.scard(f"{layer}c")
-    print(f"{layer}\t: {done} / {imanager.cg_meta.layer_chunk_counts[0]}")
-
-    done = redis.scard(f"{layer}c-postprocess")
-    print(f"{layer}\t: {done} / {imanager.cg_meta.layer_chunk_counts[0]} [postprocess]")
-
-    layers = range(3, imanager.cg_meta.layer_count + 1)
-    for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts[1:]):
+    layers = range(2, imanager.cg_meta.layer_count + 1)
+    for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts):
         done = redis.scard(f"{layer}c")
         print(f"{layer}\t: {done} / {layer_count}")
 
diff --git a/pychunkedgraph/ingest/cluster.py b/pychunkedgraph/ingest/cluster.py
index 9394c4e26..b952ae0ba 100644
--- a/pychunkedgraph/ingest/cluster.py
+++ b/pychunkedgraph/ingest/cluster.py
@@ -13,7 +13,6 @@
 from .common import get_atomic_chunk_data
 from .ran_agglomeration import get_active_edges
 from .create.atomic_layer import add_atomic_edges
-from .create.atomic_layer import postprocess_atomic_chunk
 from .create.abstract_layers import add_layer
 from ..graph.meta import ChunkedGraphMeta
 from ..graph.chunks.hierarchy import get_children_chunk_coords
@@ -25,12 +24,10 @@ def _post_task_completion(
     imanager: IngestionManager,
     layer: int,
     coords: np.ndarray,
-    postprocess: bool = False,
 ):
     chunk_str = "_".join(map(str, coords))
     # mark chunk as completed - "c"
-    pprocess = "-postprocess" if postprocess else ""
-    imanager.redis.sadd(f"{layer}c{pprocess}", chunk_str)
+    imanager.redis.sadd(f"{layer}c", chunk_str)
 
 
 def create_parent_chunk(
@@ -59,7 +56,7 @@ def randomize_grid_points(X: int, Y: int, Z: int) -> Tuple[int, int, int]:
         yield np.unravel_index(index, (X, Y, Z))
 
 
-def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
+def enqueue_atomic_tasks(imanager: IngestionManager):
     from os import environ
     from time import sleep
     from rq import Queue as RQueue
@@ -72,12 +69,7 @@ def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
         chunk_count = imanager.cg_meta.layer_chunk_counts[0]
     print(f"total chunk count: {chunk_count}, queuing...")
 
-    pprocess = ""
-    if postprocess:
-        pprocess = "-postprocess"
-        print("postprocessing l2 chunks")
-
-    queue_name = f"{imanager.config.CLUSTER.ATOMIC_Q_NAME}{pprocess}"
+    queue_name = f"{imanager.config.CLUSTER.ATOMIC_Q_NAME}"
     q = imanager.get_task_queue(queue_name)
     job_datas = []
     batch_size = int(environ.get("L2JOB_BATCH_SIZE", 1000))
@@ -89,13 +81,13 @@ def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
 
         x, y, z = chunk_coord
         chunk_str = f"{x}_{y}_{z}"
-        if imanager.redis.sismember(f"2c{pprocess}", chunk_str):
+        if imanager.redis.sismember(f"2c", chunk_str):
             # already done, skip
             continue
         job_datas.append(
             RQueue.prepare_data(
                 create_atomic_chunk,
-                args=(chunk_coord, postprocess),
+                args=(chunk_coord,),
                 timeout=environ.get("L2JOB_TIMEOUT", "3m"),
                 result_ttl=0,
                 job_id=chunk_id_str(2, chunk_coord),
@@ -107,18 +99,15 @@ def enqueue_atomic_tasks(imanager: IngestionManager, postprocess: bool = False):
     q.enqueue_many(job_datas)
 
 
-def create_atomic_chunk(coords: Sequence[int], postprocess: bool = False):
+def create_atomic_chunk(coords: Sequence[int]):
     """Creates single atomic chunk"""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
     coords = np.array(list(coords), dtype=int)
 
-    if postprocess:
-        postprocess_atomic_chunk(imanager.cg, coords)
-    else:
-        chunk_edges_all, mapping = get_atomic_chunk_data(imanager, coords)
-        chunk_edges_active, isolated_ids = get_active_edges(chunk_edges_all, mapping)
-        add_atomic_edges(imanager.cg, coords, chunk_edges_active, isolated=isolated_ids)
+    chunk_edges_all, mapping = get_atomic_chunk_data(imanager, coords)
+    chunk_edges_active, isolated_ids = get_active_edges(chunk_edges_all, mapping)
+    add_atomic_edges(imanager.cg, coords, chunk_edges_active, isolated=isolated_ids)
 
     if imanager.config.TEST_RUN:
         # print for debugging
@@ -126,7 +115,7 @@ def create_atomic_chunk(coords: Sequence[int], postprocess: bool = False):
             print(k, len(v))
         for k, v in chunk_edges_active.items():
             print(f"active_{k}", len(v))
-    _post_task_completion(imanager, 2, coords, postprocess=postprocess)
+    _post_task_completion(imanager, 2, coords)
 
 
 def _get_test_chunks(meta: ChunkedGraphMeta):
diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py
index 42b6a01b5..054a82840 100644
--- a/pychunkedgraph/ingest/create/atomic_layer.py
+++ b/pychunkedgraph/ingest/create/atomic_layer.py
@@ -151,49 +151,3 @@ def _get_outgoing_edges(node_id, chunk_edges_d, sparse_indices, remapping):
             # edges that this node is part of
             chunk_out_edges = np.concatenate([chunk_out_edges, edges[row_ids]])
     return chunk_out_edges
-
-
-def postprocess_atomic_chunk(
-    cg: ChunkedGraph,
-    chunk_coord: np.ndarray,
-    time_stamp: Optional[datetime.datetime] = None,
-):
-    time_stamp = get_valid_timestamp(time_stamp)
-
-    chunk_id = cg.get_chunk_id(
-        layer=2, x=chunk_coord[0], y=chunk_coord[1], z=chunk_coord[2]
-    )
-
-    properties = [
-        attributes.Connectivity.AtomicCrossChunkEdge[l] for l in range(2, cg.meta.layer_count)
-    ]
-
-    chunk_rr = cg.range_read_chunk(
-        chunk_id, properties=properties, time_stamp=time_stamp
-    )
-
-    result = {}
-    for l2id, raw_cx_edges in chunk_rr.items():
-        try:
-            cx_edges = {
-                prop.index: val[0].value.copy() for prop, val in raw_cx_edges.items()
-            }
-            result[l2id] = cx_edges
-        except KeyError:
-            continue
-
-    nodes = []
-    val_dicts = []
-    for l2id, cx_edges in result.items():
-        val_dict = {}
-        for layer, edges in cx_edges.items():
-            l2_edges = np.zeros_like(edges)
-            l2_edges[:, 0] = l2id
-            l2_edges[:, 1] = cg.get_parents(edges[:, 1])
-            col = attributes.Connectivity.CrossChunkEdge[layer]
-            val_dict[col] = np.unique(l2_edges, axis=0)
-            val_dicts.append(val_dict)
-
-        r_key = serializers.serialize_uint64(l2id)
-        nodes.append(cg.client.mutate_row(r_key, val_dict, time_stamp=time_stamp))
-    cg.client.write(nodes)

From 0898b32e88643e933a4d154235fd39a19d9ba6ed Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 20 Aug 2023 23:57:00 +0000
Subject: [PATCH 025/105] fix: raises specific error

---
 pychunkedgraph/ingest/cluster.py | 2 +-
 pychunkedgraph/ingest/utils.py   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/ingest/cluster.py b/pychunkedgraph/ingest/cluster.py
index b952ae0ba..a5c6a9861 100644
--- a/pychunkedgraph/ingest/cluster.py
+++ b/pychunkedgraph/ingest/cluster.py
@@ -81,7 +81,7 @@ def enqueue_atomic_tasks(imanager: IngestionManager):
 
         x, y, z = chunk_coord
         chunk_str = f"{x}_{y}_{z}"
-        if imanager.redis.sismember(f"2c", chunk_str):
+        if imanager.redis.sismember("2c", chunk_str):
             # already done, skip
             continue
         job_datas.append(
diff --git a/pychunkedgraph/ingest/utils.py b/pychunkedgraph/ingest/utils.py
index fa7ef7a3c..1c3236561 100644
--- a/pychunkedgraph/ingest/utils.py
+++ b/pychunkedgraph/ingest/utils.py
@@ -1,6 +1,6 @@
+# pylint: disable=invalid-name, missing-docstring
 from typing import Tuple
 
-
 from . import ClusterIngestConfig
 from . import IngestConfig
 from ..graph.meta import ChunkedGraphMeta
@@ -72,4 +72,4 @@ def postprocess_edge_data(im, edge_dict):
 
         return new_edge_dict
     else:
-        raise Exception(f"Unknown data_version: {data_version}")
+        raise ValueError(f"Unknown data_version: {data_version}")

From 262378f683013c0699574bb119b7f47868a99c03 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 21 Aug 2023 00:17:51 +0000
Subject: [PATCH 026/105] fix: removes dangerous default value

---
 pychunkedgraph/graph/chunkedgraph.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 210bff50b..2630d8250 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -551,7 +551,7 @@ def get_subgraph(
         node_id_or_ids: typing.Union[np.uint64, typing.Iterable],
         bbox: typing.Optional[typing.Sequence[typing.Sequence[int]]] = None,
         bbox_is_coordinate: bool = False,
-        return_layers: typing.List = [2],
+        return_layers: typing.List = None,
         nodes_only: bool = False,
         edges_only: bool = False,
         leaves_only: bool = False,
@@ -563,6 +563,9 @@ def get_subgraph(
         from .subgraph import get_subgraph_nodes
         from .subgraph import get_subgraph_edges_and_leaves
 
+        if return_layers is None:
+            return_layers = [2]
+
         if nodes_only:
             return get_subgraph_nodes(
                 self,
@@ -581,7 +584,7 @@ def get_subgraph_nodes(
         node_id_or_ids: typing.Union[np.uint64, typing.Iterable],
         bbox: typing.Optional[typing.Sequence[typing.Sequence[int]]] = None,
         bbox_is_coordinate: bool = False,
-        return_layers: typing.List = [2],
+        return_layers: typing.List = None,
         serializable: bool = False,
         return_flattened: bool = False,
     ) -> typing.Tuple[typing.Dict, typing.Dict, Edges]:
@@ -591,6 +594,9 @@ def get_subgraph_nodes(
         """
         from .subgraph import get_subgraph_nodes
 
+        if return_layers is None:
+            return_layers = [2]
+
         return get_subgraph_nodes(
             self,
             node_id_or_ids,

From aa82d224a04b98464c6f12d87ff6813fa550652b Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 21 Aug 2023 01:02:21 +0000
Subject: [PATCH 027/105] wip: read from cached edges

---
 pychunkedgraph/graph/attributes.py   |  2 +-
 pychunkedgraph/graph/chunkedgraph.py | 87 ++++++++--------------------
 pychunkedgraph/graph/edges/utils.py  | 39 +------------
 pychunkedgraph/graph/edits.py        | 11 +---
 pychunkedgraph/graph/operation.py    |  4 +-
 pychunkedgraph/graph/subgraph.py     | 36 +++++-------
 6 files changed, 49 insertions(+), 130 deletions(-)

diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index b0f18c2ec..958913119 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -111,7 +111,7 @@ class Connectivity:
     )
 
     CrossChunkEdge = _AttributeArray(
-        pattern=b"cross_edge_%d",
+        pattern=b"cross_edges_%d",
         family_id="4",
         serializer=serializers.NumPyArray(
             dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 2630d8250..83c543b6e 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-docstring, too-many-lines, import-outside-toplevel
+# pylint: disable=invalid-name, missing-docstring, too-many-lines, import-outside-toplevel, unsupported-binary-operation
 
 import time
 import typing
@@ -112,13 +112,15 @@ def range_read_chunk(
         """Read all nodes in a chunk."""
         layer = self.get_chunk_layer(chunk_id)
         root_chunk = layer == self.meta.layer_count
-        max_node_id = self.id_client.get_max_node_id(chunk_id=chunk_id, root_chunk=root_chunk)
+        max_id = self.id_client.get_max_node_id(
+            chunk_id=chunk_id, root_chunk=root_chunk
+        )
         if layer == 1:
-            max_node_id = chunk_id | self.get_segment_id_limit(chunk_id) # pylint: disable=unsupported-binary-operation
+            max_id = chunk_id | self.get_segment_id_limit(chunk_id)
 
         return self.client.read_nodes(
             start_id=self.get_node_id(np.uint64(0), chunk_id=chunk_id),
-            end_id=max_node_id,
+            end_id=max_id,
             end_id_inclusive=True,
             properties=properties,
             end_time=time_stamp,
@@ -293,7 +295,7 @@ def _get_children_multiple(
     def get_atomic_cross_edges(
         self, l2_ids: typing.Iterable, *, raw_only=False
     ) -> typing.Dict[np.uint64, typing.Dict[int, typing.Iterable]]:
-        """Returns cross edges for level 2 IDs."""
+        """Returns atomic cross edges for level 2 IDs."""
         if raw_only or not self.cache:
             node_edges_d_d = self.client.read_nodes(
                 node_ids=l2_ids,
@@ -314,67 +316,30 @@ def get_atomic_cross_edges(
             return result
         return self.cache.atomic_cross_edges_multiple(l2_ids)
 
-    def get_cross_chunk_edges(
-        self, node_ids: typing.Iterable, uplift=True, all_layers=False
-    ) -> typing.Dict[np.uint64, typing.Dict[int, typing.Iterable]]:
+    def get_cross_chunk_edges(self, node_ids: typing.Iterable) -> typing.Dict:
         """
-        Cross chunk edges for `node_id` at `node_layer`.
-        The edges are between node IDs at the `node_layer`, not atomic cross edges.
-        Returns dict {layer_id: cross_edges}
-            The first layer (>= `node_layer`) with atleast one cross chunk edge.
-            For current use-cases, other layers are not relevant.
-
-        For performance, only children that lie along chunk boundary are considered.
-        Cross edges that belong to inner level 2 IDs are subsumed within the chunk.
-        This is because cross edges are stored only in level 2 IDs.
+        Returns cross edges for `node_ids`.
+        A dict of the form `{node_id: {layer: cross_edges}}`
         """
         result = {}
         node_ids = np.array(node_ids, dtype=basetypes.NODE_ID)
-        if not node_ids.size:
+        if node_ids.size == 0:
             return result
-
-        node_l2ids_d = {}
-        layers_ = self.get_chunk_layers(node_ids)
-        for l in set(layers_):
-            node_l2ids_d.update(self._get_bounding_l2_children(node_ids[layers_ == l]))
-        l2_edges_d_d = self.get_atomic_cross_edges(
-            np.concatenate(list(node_l2ids_d.values()))
-        )
-        for node_id in node_ids:
-            l2_edges_ds = [l2_edges_d_d[l2_id] for l2_id in node_l2ids_d[node_id]]
-            if all_layers:
-                result[node_id] = edge_utils.concatenate_cross_edge_dicts(l2_edges_ds)
-            else:
-                result[node_id] = self._get_min_layer_cross_edges(
-                    node_id, l2_edges_ds, uplift=uplift
-                )
+        attrs = [
+            attributes.Connectivity.CrossChunkEdge[l]
+            for l in range(2, self.meta.layer_count)
+        ]
+        node_edges_d_d = self.client.read_nodes(node_ids=node_ids, properties=attrs)
+        for id_ in node_ids:
+            try:
+                result[id_] = {
+                    prop.index: val[0].value.copy()
+                    for prop, val in node_edges_d_d[id_].items()
+                }
+            except KeyError:
+                result[id_] = {}
         return result
 
-    def _get_min_layer_cross_edges(
-        self,
-        node_id: basetypes.NODE_ID,
-        l2id_atomic_cross_edges_ds: typing.Iterable,
-        uplift=True,
-    ) -> typing.Dict[int, typing.Iterable]:
-        """
-        Find edges at relevant min_layer >= node_layer.
-        `l2id_atomic_cross_edges_ds` is a list of atomic cross edges of
-        level 2 IDs that are descendants of `node_id`.
-        """
-        min_layer, edges = edge_utils.filter_min_layer_cross_edges_multiple(
-            self.meta, l2id_atomic_cross_edges_ds, self.get_chunk_layer(node_id)
-        )
-        if self.get_chunk_layer(node_id) < min_layer:
-            # cross edges irrelevant
-            return {self.get_chunk_layer(node_id): types.empty_2d}
-        if not uplift:
-            return {min_layer: edges}
-        node_root_id = node_id
-        node_root_id = self.get_root(node_id, stop_layer=min_layer, ceil=False)
-        edges[:, 0] = node_root_id
-        edges[:, 1] = self.get_roots(edges[:, 1], stop_layer=min_layer, ceil=False)
-        return {min_layer: np.unique(edges, axis=0) if edges.size else types.empty_2d}
-
     def get_roots(
         self,
         node_ids: typing.Sequence[np.uint64],
@@ -698,9 +663,7 @@ def get_l2_agglomerations(
             sv_parent_d.update(dict(zip(svs.tolist(), [l2id] * len(svs))))
 
         in_edges, out_edges, cross_edges = edge_utils.categorize_edges_v2(
-            self.meta,
-            all_chunk_edges,
-            sv_parent_d
+            self.meta, all_chunk_edges, sv_parent_d
         )
 
         agglomeration_d = get_agglomerations(
diff --git a/pychunkedgraph/graph/edges/utils.py b/pychunkedgraph/graph/edges/utils.py
index 034ca6ebc..94641343a 100644
--- a/pychunkedgraph/graph/edges/utils.py
+++ b/pychunkedgraph/graph/edges/utils.py
@@ -8,16 +8,17 @@
 from typing import Tuple
 from typing import Iterable
 from typing import Optional
+from collections import defaultdict
 
 import fastremap
 import numpy as np
 
 from . import Edges
 from . import EDGE_TYPES
-from ..types import empty_2d
 from ..utils import basetypes
 from ..chunks import utils as chunk_utils
 from ..meta import ChunkedGraphMeta
+from ...utils.general import in2d
 
 
 def concatenate_chunk_edges(chunk_edge_dicts: Iterable) -> Dict:
@@ -47,10 +48,7 @@ def concatenate_chunk_edges(chunk_edge_dicts: Iterable) -> Dict:
 
 def concatenate_cross_edge_dicts(edges_ds: Iterable[Dict]) -> Dict:
     """Combines cross chunk edge dicts of form {layer id : edge list}."""
-    from collections import defaultdict
-
     result_d = defaultdict(list)
-
     for edges_d in edges_ds:
         for layer, edges in edges_d.items():
             result_d[layer].append(edges)
@@ -152,40 +150,7 @@ def get_cross_chunk_edges_layer(meta: ChunkedGraphMeta, cross_edges: Iterable):
     return cross_chunk_edge_layers
 
 
-def filter_min_layer_cross_edges(
-    meta: ChunkedGraphMeta, cross_edges_d: Dict, node_layer: int = 2
-) -> Tuple[int, Iterable]:
-    """
-    Given a dict of cross chunk edges {layer: edges}
-    Return the first layer with cross edges.
-    """
-    for layer in range(node_layer, meta.layer_count):
-        edges_ = cross_edges_d.get(layer, empty_2d)
-        if edges_.size:
-            return (layer, edges_)
-    return (meta.layer_count, edges_)
-
-
-def filter_min_layer_cross_edges_multiple(
-    meta: ChunkedGraphMeta, l2id_atomic_cross_edges_ds: Iterable, node_layer: int = 2
-) -> Tuple[int, Iterable]:
-    """
-    Given a list of dicts of cross chunk edges [{layer: edges}]
-    Return the first layer with cross edges.
-    """
-    min_layer = meta.layer_count
-    for edges_d in l2id_atomic_cross_edges_ds:
-        layer_, _ = filter_min_layer_cross_edges(meta, edges_d, node_layer=node_layer)
-        min_layer = min(min_layer, layer_)
-    edges = [empty_2d]
-    for edges_d in l2id_atomic_cross_edges_ds:
-        edges.append(edges_d.get(min_layer, empty_2d))
-    return min_layer, np.concatenate(edges)
-
-
 def get_edges_status(cg, edges: Iterable, time_stamp: Optional[float] = None):
-    from ...utils.general import in2d
-
     coords0 = chunk_utils.get_chunk_coordinates_multiple(cg.meta, edges[:, 0])
     coords1 = chunk_utils.get_chunk_coordinates_multiple(cg.meta, edges[:, 1])
 
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 4cb536ea7..6d823e720 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -142,7 +142,6 @@ def check_fake_edges(
             )
         )
         assert len(roots) == 2, "edges must be from 2 roots"
-        print("found inactive", len(inactive_edges))
         return inactive_edges, []
 
     rows = []
@@ -177,7 +176,6 @@ def check_fake_edges(
                 time_stamp=time_stamp,
             )
         )
-    print("no inactive", len(atomic_edges))
     return atomic_edges, rows
 
 
@@ -249,8 +247,7 @@ def _process_l2_agglomeration(
     atomic_cross_edges_d: Dict[int, np.ndarray],
 ):
     """
-    For a given L2 id, remove given edges
-    and calculate new connected components.
+    For a given L2 id, remove given edges; calculate new connected components.
     """
     chunk_edges = agg.in_edges.get_pairs()
     cross_edges = np.concatenate([types.empty_2d, *atomic_cross_edges_d.values()])
@@ -312,7 +309,7 @@ def remove_edges(
         ccs, graph_ids, cross_edges = _process_l2_agglomeration(
             l2_agg, removed_edges, atomic_cross_edges_d[id_]
         )
-        # calculated here to avoid repeat computation in loop
+        # done here to avoid repeat computation in loop
         cross_edge_layers = cg.get_cross_chunk_edges_layer(cross_edges)
         new_parent_ids = cg.id_client.create_node_ids(
             l2id_chunk_id_d[l2_agg.node_id], len(ccs)
@@ -413,9 +410,7 @@ def _get_connected_components(
             self.cg.graph_id,
             self._operation_id,
         ):
-            self._cross_edges_d.update(
-                self.cg.get_cross_chunk_edges(not_cached, all_layers=True)
-            )
+            self._cross_edges_d.update(self.cg.get_cross_chunk_edges(not_cached))
 
         sv_parent_d, sv_cross_edges = self._map_sv_to_parent(node_ids, layer)
         get_sv_parents = np.vectorize(sv_parent_d.get, otypes=[np.uint64])
diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py
index d0d0e172a..b864a2d0d 100644
--- a/pychunkedgraph/graph/operation.py
+++ b/pychunkedgraph/graph/operation.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-docstring, too-many-lines, protected-access
+# pylint: disable=invalid-name, missing-docstring, too-many-lines, protected-access, broad_exception_raised
 
 from abc import ABC, abstractmethod
 from collections import namedtuple
@@ -469,7 +469,7 @@ def execute(
                     exception=repr(err),
                 )
                 self.cg.client.write([log_record_error])
-                raise Exception(err)
+                raise Exception(err) from err
 
             with TimeIt(f"{op_type}.write", self.cg.graph_id, lock.operation_id):
                 result = self._write(
diff --git a/pychunkedgraph/graph/subgraph.py b/pychunkedgraph/graph/subgraph.py
index ab2593175..5b50b7c43 100644
--- a/pychunkedgraph/graph/subgraph.py
+++ b/pychunkedgraph/graph/subgraph.py
@@ -1,3 +1,5 @@
+# pylint: disable=invalid-name, missing-docstring
+
 from typing import List
 from typing import Dict
 from typing import Tuple
@@ -30,9 +32,7 @@ def __init__(self, meta, node_ids, return_layers, serializable):
         # "Frontier" of nodes that cg.get_children will be called on
         self.cur_nodes = np.array(list(node_ids), dtype=np.uint64)
         # Mapping of current frontier to self.node_ids
-        self.cur_nodes_to_original_nodes = dict(
-            zip(self.cur_nodes, self.cur_nodes)
-        )
+        self.cur_nodes_to_original_nodes = dict(zip(self.cur_nodes, self.cur_nodes))
         self.stop_layer = max(1, min(return_layers))
         self.create_initial_node_to_subgraph()
 
@@ -107,13 +107,11 @@ def flatten_subgraph(self):
         for node_id in self.node_ids:
             for return_layer in self.return_layers:
                 node_key = self.get_dict_key(node_id)
-                children_at_layer = self.node_to_subgraph[node_key][
-                    return_layer
-                ]
+                children_at_layer = self.node_to_subgraph[node_key][return_layer]
                 if len(children_at_layer) > 0:
-                    self.node_to_subgraph[node_key][
-                        return_layer
-                    ] = np.concatenate(children_at_layer)
+                    self.node_to_subgraph[node_key][return_layer] = np.concatenate(
+                        children_at_layer
+                    )
                 else:
                     self.node_to_subgraph[node_key][return_layer] = empty_1d
 
@@ -123,10 +121,12 @@ def get_subgraph_nodes(
     node_id_or_ids: Union[np.uint64, Iterable],
     bbox: Optional[Sequence[Sequence[int]]] = None,
     bbox_is_coordinate: bool = False,
-    return_layers: List = [2],
+    return_layers: List = None,
     serializable: bool = False,
-    return_flattened: bool = False
+    return_flattened: bool = False,
 ) -> Tuple[Dict, Dict, Edges]:
+    if return_layers is None:
+        return_layers = [2]
     single = False
     node_ids = node_id_or_ids
     bbox = normalize_bounding_box(cg.meta, bbox, bbox_is_coordinate)
@@ -139,7 +139,7 @@ def get_subgraph_nodes(
         bounding_box=bbox,
         return_layers=return_layers,
         serializable=serializable,
-        return_flattened=return_flattened
+        return_flattened=return_flattened,
     )
     if single:
         if serializable:
@@ -183,7 +183,7 @@ def _get_subgraph_multiple_nodes(
     bounding_box: Optional[Sequence[Sequence[int]]],
     return_layers: Sequence[int],
     serializable: bool = False,
-    return_flattened: bool = False
+    return_flattened: bool = False,
 ):
     from collections import ChainMap
     from multiwrapper.multiprocessing_utils import n_cpus
@@ -223,9 +223,7 @@ def _get_subgraph_multiple_nodes_threaded(
 
     subgraph = SubgraphProgress(cg.meta, node_ids, return_layers, serializable)
     while not subgraph.done_processing():
-        this_n_threads = min(
-            [int(len(subgraph.cur_nodes) // 50000) + 1, n_cpus]
-        )
+        this_n_threads = min([int(len(subgraph.cur_nodes) // 50000) + 1, n_cpus])
         cur_nodes_child_maps = multithread_func(
             _get_subgraph_multiple_nodes_threaded,
             np.array_split(subgraph.cur_nodes, this_n_threads),
@@ -239,8 +237,6 @@ def _get_subgraph_multiple_nodes_threaded(
         for node_id in node_ids:
             subgraph.node_to_subgraph[
                 _get_dict_key(node_id)
-            ] = subgraph.node_to_subgraph[_get_dict_key(node_id)][
-                return_layers[0]
-            ]
+            ] = subgraph.node_to_subgraph[_get_dict_key(node_id)][return_layers[0]]
 
-    return subgraph.node_to_subgraph
\ No newline at end of file
+    return subgraph.node_to_subgraph

From 172f897a9edb52bd3cdd7c4a20889b7fc5d69211 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 21 Aug 2023 03:24:03 +0000
Subject: [PATCH 028/105] wip: edits refactor

---
 pychunkedgraph/graph/cache.py        |  30 ++++---
 pychunkedgraph/graph/chunkedgraph.py |  75 +++++++++--------
 pychunkedgraph/graph/edits.py        | 119 +++++++++------------------
 3 files changed, 92 insertions(+), 132 deletions(-)

diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py
index 8c824c732..4e5ed17c1 100644
--- a/pychunkedgraph/graph/cache.py
+++ b/pychunkedgraph/graph/cache.py
@@ -31,26 +31,24 @@ def __init__(self, cg):
 
         self._parent_vec = np.vectorize(self.parent, otypes=[np.uint64])
         self._children_vec = np.vectorize(self.children, otypes=[np.ndarray])
-        self._atomic_cross_edges_vec = np.vectorize(
-            self.atomic_cross_edges, otypes=[dict]
-        )
+        self._cross_chunk_edges_vec = np.vectorize(self.cross_chunk_edges, otypes=[dict])
 
         # no limit because we don't want to lose new IDs
         self.parents_cache = LRUCache(maxsize=maxsize)
         self.children_cache = LRUCache(maxsize=maxsize)
-        self.atomic_cx_edges_cache = LRUCache(maxsize=maxsize)
+        self.cross_chunk_edges_cache = LRUCache(maxsize=maxsize)
 
     def __len__(self):
         return (
             len(self.parents_cache)
             + len(self.children_cache)
-            + len(self.atomic_cx_edges_cache)
+            + len(self.cross_chunk_edges_cache)
         )
 
     def clear(self):
         self.parents_cache.clear()
         self.children_cache.clear()
-        self.atomic_cx_edges_cache.clear()
+        self.cross_chunk_edges_cache.clear()
 
     def parent(self, node_id: np.uint64, *, time_stamp: datetime = None):
         @cached(cache=self.parents_cache, key=lambda node_id: node_id)
@@ -68,15 +66,15 @@ def children_decorated(node_id):
 
         return children_decorated(node_id)
 
-    def atomic_cross_edges(self, node_id):
-        @cached(cache=self.atomic_cx_edges_cache, key=lambda node_id: node_id)
-        def atomic_cross_edges_decorated(node_id):
-            edges = self._cg.get_atomic_cross_edges(
+    def cross_chunk_edges(self, node_id):
+        @cached(cache=self.cross_chunk_edges_cache, key=lambda node_id: node_id)
+        def cross_edges_decorated(node_id):
+            edges = self._cg.get_cross_chunk_edges(
                 np.array([node_id], dtype=NODE_ID), raw_only=True
             )
             return edges[node_id]
 
-        return atomic_cross_edges_decorated(node_id)
+        return cross_edges_decorated(node_id)
 
     def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None):
         if not node_ids.size:
@@ -105,20 +103,20 @@ def children_multiple(self, node_ids: np.ndarray, *, flatten=False):
             return np.concatenate([*result.values()])
         return result
 
-    def atomic_cross_edges_multiple(self, node_ids: np.ndarray):
+    def cross_chunk_edges_multiple(self, node_ids: np.ndarray):
         result = {}
         if not node_ids.size:
             return result
         mask = np.in1d(
-            node_ids, np.fromiter(self.atomic_cx_edges_cache.keys(), dtype=NODE_ID)
+            node_ids, np.fromiter(self.cross_chunk_edges_cache.keys(), dtype=NODE_ID)
         )
-        cached_edges_ = self._atomic_cross_edges_vec(node_ids[mask])
+        cached_edges_ = self._cross_chunk_edges_vec(node_ids[mask])
         result.update(
             {id_: edges_ for id_, edges_ in zip(node_ids[mask], cached_edges_)}
         )
-        result.update(self._cg.get_atomic_cross_edges(node_ids[~mask], raw_only=True))
+        result.update(self._cg.get_cross_chunk_edges(node_ids[~mask], raw_only=True))
         update(
-            self.atomic_cx_edges_cache,
+            self.cross_chunk_edges_cache,
             node_ids[~mask],
             [result[k] for k in node_ids[~mask]],
         )
diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 83c543b6e..1cdecd77a 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -292,45 +292,20 @@ def _get_children_multiple(
             }
         return self.cache.children_multiple(node_ids)
 
-    def get_atomic_cross_edges(
-        self, l2_ids: typing.Iterable, *, raw_only=False
-    ) -> typing.Dict[np.uint64, typing.Dict[int, typing.Iterable]]:
-        """Returns atomic cross edges for level 2 IDs."""
-        if raw_only or not self.cache:
-            node_edges_d_d = self.client.read_nodes(
-                node_ids=l2_ids,
-                properties=[
-                    attributes.Connectivity.CrossChunkEdge[l]
-                    for l in range(2, max(3, self.meta.layer_count))
-                ],
-            )
-            result = {}
-            for id_ in l2_ids:
-                try:
-                    result[id_] = {
-                        prop.index: val[0].value.copy()
-                        for prop, val in node_edges_d_d[id_].items()
-                    }
-                except KeyError:
-                    result[id_] = {}
-            return result
-        return self.cache.atomic_cross_edges_multiple(l2_ids)
-
-    def get_cross_chunk_edges(self, node_ids: typing.Iterable) -> typing.Dict:
+    def get_atomic_cross_edges(self, l2_ids: typing.Iterable) -> typing.Dict:
         """
-        Returns cross edges for `node_ids`.
-        A dict of the form `{node_id: {layer: cross_edges}}`
+        Returns atomic cross edges for level 2 IDs.
+        A dict of the form `{l2id: {layer: atomic_cross_edges}}`.
         """
+        node_edges_d_d = self.client.read_nodes(
+            node_ids=l2_ids,
+            properties=[
+                attributes.Connectivity.AtomicCrossChunkEdge[l]
+                for l in range(2, self.meta.layer_count)
+            ],
+        )
         result = {}
-        node_ids = np.array(node_ids, dtype=basetypes.NODE_ID)
-        if node_ids.size == 0:
-            return result
-        attrs = [
-            attributes.Connectivity.CrossChunkEdge[l]
-            for l in range(2, self.meta.layer_count)
-        ]
-        node_edges_d_d = self.client.read_nodes(node_ids=node_ids, properties=attrs)
-        for id_ in node_ids:
+        for id_ in l2_ids:
             try:
                 result[id_] = {
                     prop.index: val[0].value.copy()
@@ -340,6 +315,34 @@ def get_cross_chunk_edges(self, node_ids: typing.Iterable) -> typing.Dict:
                 result[id_] = {}
         return result
 
+    def get_cross_chunk_edges(
+        self, node_ids: typing.Iterable, *, raw_only=False
+    ) -> typing.Dict:
+        """
+        Returns cross edges for `node_ids`.
+        A dict of the form `{node_id: {layer: cross_edges}}`.
+        """
+        if raw_only or not self.cache:
+            result = {}
+            node_ids = np.array(node_ids, dtype=basetypes.NODE_ID)
+            if node_ids.size == 0:
+                return result
+            attrs = [
+                attributes.Connectivity.CrossChunkEdge[l]
+                for l in range(2, self.meta.layer_count)
+            ]
+            node_edges_d_d = self.client.read_nodes(node_ids=node_ids, properties=attrs)
+            for id_ in node_ids:
+                try:
+                    result[id_] = {
+                        prop.index: val[0].value.copy()
+                        for prop, val in node_edges_d_d[id_].items()
+                    }
+                except KeyError:
+                    result[id_] = {}
+            return result
+        return self.cache.cross_chunk_edges_multiple(node_ids)
+
     def get_roots(
         self,
         node_ids: typing.Sequence[np.uint64],
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 6d823e720..68a8c9b3b 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -38,9 +38,9 @@ def _analyze_affected_edges(
     cg, atomic_edges: Iterable[np.ndarray], parent_ts: datetime.datetime = None
 ) -> Tuple[Iterable, Dict]:
     """
-    Determine if atomic edges are within the chunk.
-    If not, they are cross edges between two L2 IDs in adjacent chunks.
-    Returns edges between L2 IDs and atomic cross edges.
+    Returns l2 edges within chunk and adds self edges for nodes in cross chunk edges.
+
+    Also returns new cross edges dicts for nodes crossing chunk boundary.
     """
     supervoxels = np.unique(atomic_edges)
     parents = cg.get_parents(supervoxels, time_stamp=parent_ts)
@@ -51,19 +51,18 @@ def _analyze_affected_edges(
         for edge_ in atomic_edges[edge_layers == 1]
     ]
 
-    # cross chunk edges
-    atomic_cross_edges_d = defaultdict(lambda: defaultdict(list))
+    cross_edges_d = defaultdict(lambda: defaultdict(list))
     for layer in range(2, cg.meta.layer_count):
         layer_edges = atomic_edges[edge_layers == layer]
         if not layer_edges.size:
             continue
         for edge in layer_edges:
-            parent_1 = sv_parent_d[edge[0]]
-            parent_2 = sv_parent_d[edge[1]]
-            atomic_cross_edges_d[parent_1][layer].append(edge)
-            atomic_cross_edges_d[parent_2][layer].append(edge[::-1])
-            parent_edges.extend([[parent_1, parent_1], [parent_2, parent_2]])
-    return (parent_edges, atomic_cross_edges_d)
+            parent0 = sv_parent_d[edge[0]]
+            parent1 = sv_parent_d[edge[1]]
+            cross_edges_d[parent0][layer].append([parent0, parent1])
+            cross_edges_d[parent1][layer].append([parent1, parent0])
+            parent_edges.extend([[parent0, parent0], [parent1, parent1]])
+    return parent_edges, cross_edges_d
 
 
 def _get_relevant_components(edges: np.ndarray, supervoxels: np.ndarray) -> Tuple:
@@ -89,9 +88,7 @@ def merge_preprocess(
     parent_ts: datetime.datetime = None,
 ) -> np.ndarray:
     """
-    Determine if a fake edge needs to be added.
-    Get subgraph within the bounding box
-    Add fake edge if there are no inactive edges between two components.
+    Check and return inactive edges in the subgraph.
     """
     edge_layers = cg.get_cross_chunk_edges_layer(subgraph_edges)
     active_edges = [types.empty_2d]
@@ -146,6 +143,7 @@ def check_fake_edges(
 
     rows = []
     supervoxels = atomic_edges.ravel()
+    # fake edges are stored with l2 chunks
     chunk_ids = cg.get_chunk_ids_from_node_ids(
         cg.get_parents(supervoxels, time_stamp=parent_ts)
     )
@@ -188,21 +186,19 @@ def add_edges(
     parent_ts: datetime.datetime = None,
     allow_same_segment_merge=False,
 ):
-    edges, l2_atomic_cross_edges_d = _analyze_affected_edges(
+    edges, l2_cross_edges_d = _analyze_affected_edges(
         cg, atomic_edges, parent_ts=parent_ts
     )
     l2ids = np.unique(edges)
     if not allow_same_segment_merge:
-        assert (
-            np.unique(cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts)).size
-            == 2
-        ), "L2 IDs must belong to different roots."
+        roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts)
+        assert np.unique(roots).size == 2, "L2 IDs must belong to different roots."
     new_old_id_d, old_new_id_d, old_hierarchy_d = _init_old_hierarchy(
         cg, l2ids, parent_ts=parent_ts
     )
     atomic_children_d = cg.get_children(l2ids)
-    atomic_cross_edges_d = merge_cross_edge_dicts(
-        cg.get_atomic_cross_edges(l2ids), l2_atomic_cross_edges_d
+    cross_edges_d = merge_cross_edge_dicts(
+        cg.get_cross_chunk_edges(l2ids), l2_cross_edges_d
     )
 
     graph, _, _, graph_ids = flatgraph.build_gt_graph(edges, make_directed=True)
@@ -214,8 +210,8 @@ def add_edges(
         cg.cache.children_cache[new_id] = np.concatenate(
             [atomic_children_d[l2id] for l2id in l2ids_]
         )
-        cg.cache.atomic_cx_edges_cache[new_id] = concatenate_cross_edge_dicts(
-            [atomic_cross_edges_d[l2id] for l2id in l2ids_]
+        cg.cache.cross_chunk_edges_cache[new_id] = concatenate_cross_edge_dicts(
+            [cross_edges_d[l2id] for l2id in l2ids_]
         )
         cache_utils.update(
             cg.cache.parents_cache, cg.cache.children_cache[new_id], new_id
@@ -300,14 +296,14 @@ def remove_edges(
         cg, l2ids, parent_ts=parent_ts
     )
     l2id_chunk_id_d = dict(zip(l2ids.tolist(), cg.get_chunk_ids_from_node_ids(l2ids)))
-    atomic_cross_edges_d = cg.get_atomic_cross_edges(l2ids)
+    cross_edges_d = cg.get_cross_chunk_edges(l2ids)
 
     removed_edges = np.concatenate([atomic_edges, atomic_edges[:, ::-1]], axis=0)
     new_l2_ids = []
     for id_ in l2ids:
         l2_agg = l2id_agglomeration_d[id_]
         ccs, graph_ids, cross_edges = _process_l2_agglomeration(
-            l2_agg, removed_edges, atomic_cross_edges_d[id_]
+            l2_agg, removed_edges, cross_edges_d[id_]
         )
         # done here to avoid repeat computation in loop
         cross_edge_layers = cg.get_cross_chunk_edges_layer(cross_edges)
@@ -386,60 +382,27 @@ def _get_old_ids(self, new_ids):
         ]
         return np.concatenate(old_ids)
 
-    def _map_sv_to_parent(self, node_ids, layer, node_map=None):
-        sv_parent_d = {}
-        sv_cross_edges = [types.empty_2d]
-        if node_map is None:
-            node_map = {}
-        for id_ in node_ids:
-            id_eff = node_map.get(id_, id_)
-            edges_ = self._cross_edges_d[id_].get(layer, types.empty_2d)
-            sv_parent_d.update(dict(zip(edges_[:, 0], [id_eff] * len(edges_))))
-            sv_cross_edges.append(edges_)
-        return sv_parent_d, np.concatenate(sv_cross_edges)
-
-    def _get_connected_components(
-        self, node_ids: np.ndarray, layer: int, lower_layer_ids: np.ndarray
-    ):
-        _node_ids = np.concatenate([node_ids, lower_layer_ids])
-        cached = np.fromiter(self._cross_edges_d.keys(), dtype=basetypes.NODE_ID)
-        not_cached = _node_ids[~np.in1d(_node_ids, cached)]
-
+    def _get_connected_components(self, node_ids: np.ndarray, layer: int):
         with TimeIt(
             f"get_cross_chunk_edges.{layer}",
             self.cg.graph_id,
             self._operation_id,
         ):
-            self._cross_edges_d.update(self.cg.get_cross_chunk_edges(not_cached))
-
-        sv_parent_d, sv_cross_edges = self._map_sv_to_parent(node_ids, layer)
-        get_sv_parents = np.vectorize(sv_parent_d.get, otypes=[np.uint64])
-        try:
-            cross_edges = get_sv_parents(sv_cross_edges)
-        except TypeError:  # NoneType error
-            # if there is a missing parent, try including lower layer ids
-            # this can happen due to skip connections
-
-            # we want to map all these lower IDs to the current layer
-            lower_layer_to_layer = self.cg.get_roots(
-                lower_layer_ids, stop_layer=layer, ceil=False
-            )
-            node_map = {k: v for k, v in zip(lower_layer_ids, lower_layer_to_layer)}
-            sv_parent_d, sv_cross_edges = self._map_sv_to_parent(
-                _node_ids, layer, node_map=node_map
-            )
-            get_sv_parents = np.vectorize(sv_parent_d.get, otypes=[np.uint64])
-            cross_edges = get_sv_parents(sv_cross_edges)
+            cross_edges_d = self.cg.get_cross_chunk_edges(node_ids)
+            self._cross_edges_d.update(cross_edges_d)
+
+        cross_edges = [types.empty_2d]
+        for id_ in node_ids:
+            edges_ = self._cross_edges_d[id_].get(layer, types.empty_2d)
+            cross_edges.append(edges_)
 
-        cross_edges = np.concatenate([cross_edges, np.vstack([node_ids, node_ids]).T])
+        cross_edges = np.concatenate([*cross_edges, np.vstack([node_ids, node_ids]).T])
         graph, _, _, graph_ids = flatgraph.build_gt_graph(
             cross_edges, make_directed=True
         )
         return flatgraph.connected_components(graph), graph_ids
 
-    def _get_layer_node_ids(
-        self, new_ids: np.ndarray, layer: int
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    def _get_layer_node_ids(self, new_ids: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
         # get old identities of new IDs
         old_ids = self._get_old_ids(new_ids)
         # get their parents, then children of those parents
@@ -458,9 +421,7 @@ def _get_layer_node_ids(
             ]
             + [node_ids[~mask], new_ids]
         )
-        node_ids = np.unique(node_ids)
-        layer_mask = self.cg.get_chunk_layers(node_ids) == layer
-        return node_ids[layer_mask], node_ids[~layer_mask]
+        return np.unique(node_ids)
 
     def _create_new_parents(self, layer: int):
         """
@@ -473,10 +434,8 @@ def _create_new_parents(self, layer: int):
         update parent old IDs
         """
         new_ids = self._new_ids_d[layer]
-        layer_node_ids, lower_layer_ids = self._get_layer_node_ids(new_ids, layer)
-        components, graph_ids = self._get_connected_components(
-            layer_node_ids, layer, lower_layer_ids
-        )
+        layer_node_ids = self._get_layer_node_ids(new_ids)
+        components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         for cc_indices in components:
             parent_layer = layer + 1
             cc_ids = graph_ids[cc_indices]
@@ -553,20 +512,20 @@ def _update_root_id_lineage(self):
             )
         return rows
 
-    def _get_atomic_cross_edges_val_dict(self):
+    def _get_cross_edges_val_dict(self):
         new_ids = np.array(self._new_ids_d[2], dtype=basetypes.NODE_ID)
         val_dicts = {}
-        atomic_cross_edges_d = self.cg.get_atomic_cross_edges(new_ids)
+        cross_edges_d = self.cg.get_cross_chunk_edges(new_ids)
         for id_ in new_ids:
             val_dict = {}
-            for layer, edges in atomic_cross_edges_d[id_].items():
-                val_dict[attributes.Connectivity.AtomicCrossChunkEdge[layer]] = edges
+            for layer, edges in cross_edges_d[id_].items():
+                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
             val_dicts[id_] = val_dict
         return val_dicts
 
     def create_new_entries(self) -> List:
         rows = []
-        val_dicts = self._get_atomic_cross_edges_val_dict()
+        val_dicts = self._get_cross_edges_val_dict()
         for layer in range(2, self.cg.meta.layer_count + 1):
             new_ids = self._new_ids_d[layer]
             for id_ in new_ids:

From 156f2cd9c9ee7dca1efbe64cb0551605bc29e3e3 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 21 Aug 2023 17:31:47 +0000
Subject: [PATCH 029/105] wip: edits refactor

---
 pychunkedgraph/graph/edits.py | 44 ++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 68a8c9b3b..ae7c25b4c 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -7,6 +7,7 @@
 from typing import Iterable
 from collections import defaultdict
 
+import fastremap
 import numpy as np
 import fastremap
 
@@ -233,6 +234,8 @@ def add_edges(
     )
 
     new_roots = create_parents.run()
+    print("new_roots", new_roots, cg.meta.layer_count)
+    print(cg.get_children(np.array(new_roots, dtype=np.uint64)))
     new_entries = create_parents.create_new_entries()
     return new_roots, new_l2_ids, new_entries
 
@@ -397,21 +400,22 @@ def _get_connected_components(self, node_ids: np.ndarray, layer: int):
             cross_edges.append(edges_)
 
         cross_edges = np.concatenate([*cross_edges, np.vstack([node_ids, node_ids]).T])
+        temp_d = {k: next(iter(v)) for k, v in self._old_new_id_d.items()}
+        cross_edges = fastremap.remap(cross_edges, temp_d, preserve_missing_labels=True)
+
         graph, _, _, graph_ids = flatgraph.build_gt_graph(
             cross_edges, make_directed=True
         )
         return flatgraph.connected_components(graph), graph_ids
 
-    def _get_layer_node_ids(self, new_ids: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    def _get_layer_node_ids(
+        self, new_ids: np.ndarray, layer: int
+    ) -> Tuple[np.ndarray, np.ndarray]:
         # get old identities of new IDs
         old_ids = self._get_old_ids(new_ids)
         # get their parents, then children of those parents
-        node_ids = self.cg.get_children(
-            np.unique(
-                self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts)
-            ),
-            flatten=True,
-        )
+        parents = self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts)
+        node_ids = self.cg.get_children(np.unique(parents), flatten=True)
         # replace old identities with new IDs
         mask = np.in1d(node_ids, old_ids)
         node_ids = np.concatenate(
@@ -421,7 +425,9 @@ def _get_layer_node_ids(self, new_ids: np.ndarray) -> Tuple[np.ndarray, np.ndarr
             ]
             + [node_ids[~mask], new_ids]
         )
-        return np.unique(node_ids)
+        node_ids = np.unique(node_ids)
+        layer_mask = self.cg.get_chunk_layers(node_ids) == layer
+        return node_ids[layer_mask]
 
     def _create_new_parents(self, layer: int):
         """
@@ -434,7 +440,7 @@ def _create_new_parents(self, layer: int):
         update parent old IDs
         """
         new_ids = self._new_ids_d[layer]
-        layer_node_ids = self._get_layer_node_ids(new_ids)
+        layer_node_ids = self._get_layer_node_ids(new_ids, layer)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         for cc_indices in components:
             parent_layer = layer + 1
@@ -458,6 +464,11 @@ def _create_new_parents(self, layer: int):
                 cc_ids,
                 parent_id,
             )
+
+            children_cx_edges = [self._cross_edges_d[child] for child in cc_ids]
+            cx_edges = concatenate_cross_edge_dicts(children_cx_edges)
+            self.cg.cache.cross_chunk_edges_cache[parent_id] = cx_edges
+
             self._update_id_lineage(parent_id, cc_ids, layer, parent_layer)
 
     def run(self) -> Iterable:
@@ -513,14 +524,15 @@ def _update_root_id_lineage(self):
         return rows
 
     def _get_cross_edges_val_dict(self):
-        new_ids = np.array(self._new_ids_d[2], dtype=basetypes.NODE_ID)
         val_dicts = {}
-        cross_edges_d = self.cg.get_cross_chunk_edges(new_ids)
-        for id_ in new_ids:
-            val_dict = {}
-            for layer, edges in cross_edges_d[id_].items():
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
-            val_dicts[id_] = val_dict
+        for layer in range(2, self.cg.meta.layer_count):
+            new_ids = np.array(self._new_ids_d[layer], dtype=basetypes.NODE_ID)
+            cross_edges_d = self.cg.get_cross_chunk_edges(new_ids)
+            for id_ in new_ids:
+                val_dict = {}
+                for layer, edges in cross_edges_d[id_].items():
+                    val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+                val_dicts[id_] = val_dict
         return val_dicts
 
     def create_new_entries(self) -> List:

From 88ffbf2184edfd69c59f38b2409f72f8e4efb0e0 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 22 Aug 2023 16:59:25 +0000
Subject: [PATCH 030/105] fix(ingest): cache cross chunk edges from children

---
 .../ingest/create/abstract_layers.py          | 84 ++++++++++++-------
 pychunkedgraph/ingest/create/cross_edges.py   |  2 +-
 2 files changed, 56 insertions(+), 30 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 63b613ae6..9a339443f 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel
+# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel, c-extension-no-member
 
 """
 Functions for creating parents in level 3 and above
@@ -9,8 +9,8 @@
 import multiprocessing as mp
 from typing import Optional
 from typing import Sequence
-from collections import defaultdict
 
+import fastremap
 import numpy as np
 from multiwrapper import multiprocessing_utils as mu
 
@@ -21,6 +21,7 @@
 from ...graph.utils import basetypes
 from ...graph.utils import serializers
 from ...graph.chunkedgraph import ChunkedGraph
+from ...graph.edges.utils import concatenate_cross_edge_dicts
 from ...graph.utils.generic import get_valid_timestamp
 from ...graph.utils.generic import filter_failed_node_ids
 from ...graph.chunks.hierarchy import get_children_chunk_coords
@@ -60,7 +61,6 @@ def add_layer(
         layer_id,
         parent_coords,
         connected_components,
-        cx_edges,
         get_valid_timestamp(time_stamp),
         n_threads > 1,
     )
@@ -121,7 +121,7 @@ def _read_chunk(children_ids_shared, cg: ChunkedGraph, layer_id: int, chunk_coor
 
 
 def _write_connected_components(
-    cg, layer, pcoords, components, cx_edges, time_stamp, use_threads=True
+    cg, layer, pcoords, components, time_stamp, use_threads=True
 ):
     if len(components) == 0:
         return
@@ -131,7 +131,7 @@ def _write_connected_components(
         node_layer_d = get_chunk_nodes_cross_edge_layer(cg, layer, pcoords, use_threads)
 
     if not use_threads:
-        _write(cg, layer, pcoords, components, cx_edges, node_layer_d, time_stamp, use_threads)
+        _write(cg, layer, pcoords, components, node_layer_d, time_stamp, use_threads)
         return
 
     task_size = int(math.ceil(len(components) / mp.cpu_count() / 10))
@@ -139,7 +139,7 @@ def _write_connected_components(
     cg_info = cg.get_serialized_info()
     multi_args = []
     for ccs in chunked_ccs:
-        args = (cg_info, layer, pcoords, ccs, cx_edges, node_layer_d, time_stamp)
+        args = (cg_info, layer, pcoords, ccs, node_layer_d, time_stamp)
         multi_args.append(args)
     mu.multiprocess_func(
         _write_components_helper,
@@ -149,9 +149,9 @@ def _write_connected_components(
 
 
 def _write_components_helper(args):
-    cg_info, layer, pcoords, ccs, cx_edges, node_layer_d, time_stamp = args
+    cg_info, layer, pcoords, ccs, node_layer_d, time_stamp = args
     cg = ChunkedGraph(**cg_info)
-    _write(cg, layer, pcoords, ccs, cx_edges, node_layer_d, time_stamp)
+    _write(cg, layer, pcoords, ccs, node_layer_d, time_stamp)
 
 
 def _write(
@@ -159,13 +159,12 @@ def _write(
     layer_id,
     parent_coords,
     components,
-    cx_edges,
     node_layer_d,
     time_stamp,
     use_threads=True,
 ):
-    parent_layer_ids = range(layer_id, cg.meta.layer_count + 1)
-    cc_connections = {l: [] for l in parent_layer_ids}
+    parent_layers = range(layer_id, cg.meta.layer_count + 1)
+    cc_connections = {l: [] for l in parent_layers}
     for node_ids in components:
         layer = layer_id
         if len(node_ids) == 1:
@@ -177,40 +176,67 @@ def _write(
     parent_chunk_id = cg.get_chunk_id(layer=layer_id, x=x, y=y, z=z)
     parent_chunk_id_dict = cg.get_parent_chunk_id_dict(parent_chunk_id)
 
-    cx_edges = np.array(cx_edges, dtype=basetypes.NODE_ID)
-    for parent_layer_id in parent_layer_ids:
-        if len(cc_connections[parent_layer_id]) == 0:
+    for parent_layer in parent_layers:
+        if len(cc_connections[parent_layer]) == 0:
             continue
 
-        parent_chunk_id = parent_chunk_id_dict[parent_layer_id]
+        parent_chunk_id = parent_chunk_id_dict[parent_layer]
         reserved_parent_ids = cg.id_client.create_node_ids(
             parent_chunk_id,
-            size=len(cc_connections[parent_layer_id]),
-            root_chunk=parent_layer_id == cg.meta.layer_count and use_threads,
+            size=len(cc_connections[parent_layer]),
+            root_chunk=parent_layer == cg.meta.layer_count and use_threads,
         )
 
-        for i_cc, node_ids in enumerate(cc_connections[parent_layer_id]):
-            node_cx_edges_d = defaultdict(lambda: types.empty_2d)
-            for node in node_ids:
-                mask0 = cx_edges[:, 0] == node
-                mask1 = cx_edges[:, 1] == node
-                node_cx_edges_d[node] = cx_edges[mask0 | mask1]
-
+        for i_cc, node_ids in enumerate(cc_connections[parent_layer]):
             parent_id = reserved_parent_ids[i_cc]
+
+            if parent_layer == 3:
+                # children are from atomic chunks
+                cx_edges_d = cg.get_atomic_cross_edges(node_ids)
+            else:
+                # children are from abstract chunks
+                cx_edges_d = cg.get_cross_chunk_edges(node_ids, raw_only=True)
+
+            children_cx_edges = []
             for node in node_ids:
+                node_layer = cg.get_chunk_layer(node)
                 row_id = serializers.serialize_uint64(node)
                 val_dict = {attributes.Hierarchy.Parent: parent_id}
 
-                node_cx_edges = node_cx_edges_d[node]
-                cx_layers = cg.get_cross_chunk_edges_layer(node_cx_edges)
-                for layer in set(cx_layers):
-                    layer_mask = cx_layers == layer
+                node_cx_edges_d = cx_edges_d.get(node, {})
+                if not node_cx_edges_d:
+                    rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
+                    continue
+
+                for layer in range(node_layer, cg.meta.layer_count):
+                    if not layer in node_cx_edges_d:
+                        continue
+
+                    layer_edges = node_cx_edges_d[layer]
+                    edges_nodes = np.unique(layer_edges)
+                    edges_nodes_parents = cg.get_parents(edges_nodes)
+                    temp_map = dict(zip(edges_nodes, edges_nodes_parents))
+
+                    layer_edges = fastremap.remap(
+                        layer_edges, temp_map, preserve_missing_labels=True
+                    )
+                    layer_edges = np.unique(layer_edges, axis=0)
+
                     col = attributes.Connectivity.CrossChunkEdge[layer]
-                    val_dict[col] = node_cx_edges[layer_mask]
+                    val_dict[col] = layer_edges
+                    node_cx_edges_d[layer] = layer_edges
+                children_cx_edges.append(node_cx_edges_d)
                 rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
 
             row_id = serializers.serialize_uint64(parent_id)
             val_dict = {attributes.Hierarchy.Child: node_ids}
+            parent_cx_edges_d = concatenate_cross_edge_dicts(children_cx_edges, unique=True)
+            for layer in range(parent_layer, cg.meta.layer_count):
+                if not layer in parent_cx_edges_d:
+                    continue
+                col = attributes.Connectivity.CrossChunkEdge[layer]
+                val_dict[col] = parent_cx_edges_d[layer]
+
             rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
             if len(rows) > 100000:
                 cg.client.write(rows)
diff --git a/pychunkedgraph/ingest/create/cross_edges.py b/pychunkedgraph/ingest/create/cross_edges.py
index 5f0ebf8df..9581838af 100644
--- a/pychunkedgraph/ingest/create/cross_edges.py
+++ b/pychunkedgraph/ingest/create/cross_edges.py
@@ -63,7 +63,7 @@ def _get_children_chunk_cross_edges_helper(args) -> None:
     edge_ids_shared.append(_get_children_chunk_cross_edges(cg, atomic_chunks, layer))
 
 
-def _get_children_chunk_cross_edges(cg: ChunkedGraph, atomic_chunks, layer) -> None:
+def _get_children_chunk_cross_edges(cg: ChunkedGraph, atomic_chunks, layer) -> np.ndarray:
     """
     Non parallelized version
     Cross edges that connect children chunks.

From c5ddd1bc1b7f298d84ff684d64c9c5c7975191ce Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 22 Aug 2023 17:21:13 +0000
Subject: [PATCH 031/105] feat: add unique flag

---
 pychunkedgraph/graph/edges/utils.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/edges/utils.py b/pychunkedgraph/graph/edges/utils.py
index 94641343a..cd0e85fe8 100644
--- a/pychunkedgraph/graph/edges/utils.py
+++ b/pychunkedgraph/graph/edges/utils.py
@@ -46,7 +46,7 @@ def concatenate_chunk_edges(chunk_edge_dicts: Iterable) -> Dict:
     return edges_dict
 
 
-def concatenate_cross_edge_dicts(edges_ds: Iterable[Dict]) -> Dict:
+def concatenate_cross_edge_dicts(edges_ds: Iterable[Dict], unique: bool = False) -> Dict:
     """Combines cross chunk edge dicts of form {layer id : edge list}."""
     result_d = defaultdict(list)
     for edges_d in edges_ds:
@@ -54,7 +54,10 @@ def concatenate_cross_edge_dicts(edges_ds: Iterable[Dict]) -> Dict:
             result_d[layer].append(edges)
 
     for layer, edge_lists in result_d.items():
-        result_d[layer] = np.concatenate(edge_lists)
+        edges = np.concatenate(edge_lists)
+        if unique:
+            edges = np.unique(edges, axis=0)
+        result_d[layer] = edges
     return result_d
 
 

From 397a4380ea65a02e4435314798ac4966c0388149 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 22 Aug 2023 17:21:53 +0000
Subject: [PATCH 032/105] feat: cross edges column family gcversionrule

---
 pychunkedgraph/graph/attributes.py            | 20 +++++++++----------
 .../graph/client/bigtable/client.py           |  4 +++-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index 958913119..84283161d 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -104,10 +104,12 @@ class Connectivity:
         serializer=serializers.NumPyArray(dtype=basetypes.EDGE_AREA),
     )
 
-    FakeEdges = _Attribute(
-        key=b"fake_edges",
-        family_id="4",
-        serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
+    AtomicCrossChunkEdge = _AttributeArray(
+        pattern=b"atomic_cross_edges_%d",
+        family_id="3",
+        serializer=serializers.NumPyArray(
+            dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
+        ),
     )
 
     CrossChunkEdge = _AttributeArray(
@@ -118,12 +120,10 @@ class Connectivity:
         ),
     )
 
-    AtomicCrossChunkEdge = _AttributeArray(
-        pattern=b"atomic_cross_edges_%d",
-        family_id="3",
-        serializer=serializers.NumPyArray(
-            dtype=basetypes.NODE_ID, shape=(-1, 2), compression_level=22
-        ),
+    FakeEdges = _Attribute(
+        key=b"fake_edges",
+        family_id="5",
+        serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
     )
 
 
diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 788c76a8e..1bd027255 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -638,7 +638,9 @@ def _create_column_families(self):
         f.create()
         f = self._table.column_family("3", gc_rule=MaxAgeGCRule(timedelta(days=365)))
         f.create()
-        f = self._table.column_family("4")
+        f = self._table.column_family("4", gc_rule=MaxVersionsGCRule(1))
+        f.create()
+        f = self._table.column_family("5")
         f.create()
 
     def _get_ids_range(self, key: bytes, size: int) -> typing.Tuple:

From 0ab0759d5a7710e9654274a8849f005efde76faa Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 22 Aug 2023 17:22:10 +0000
Subject: [PATCH 033/105] fix: convert input to np arrays

---
 pychunkedgraph/graph/cache.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py
index 4e5ed17c1..52fdfd022 100644
--- a/pychunkedgraph/graph/cache.py
+++ b/pychunkedgraph/graph/cache.py
@@ -31,7 +31,9 @@ def __init__(self, cg):
 
         self._parent_vec = np.vectorize(self.parent, otypes=[np.uint64])
         self._children_vec = np.vectorize(self.children, otypes=[np.ndarray])
-        self._cross_chunk_edges_vec = np.vectorize(self.cross_chunk_edges, otypes=[dict])
+        self._cross_chunk_edges_vec = np.vectorize(
+            self.cross_chunk_edges, otypes=[dict]
+        )
 
         # no limit because we don't want to lose new IDs
         self.parents_cache = LRUCache(maxsize=maxsize)
@@ -77,6 +79,7 @@ def cross_edges_decorated(node_id):
         return cross_edges_decorated(node_id)
 
     def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None):
+        node_ids = np.array(node_ids, dtype=NODE_ID)
         if not node_ids.size:
             return node_ids
         mask = np.in1d(node_ids, np.fromiter(self.parents_cache.keys(), dtype=NODE_ID))
@@ -90,6 +93,7 @@ def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None)
 
     def children_multiple(self, node_ids: np.ndarray, *, flatten=False):
         result = {}
+        node_ids = np.array(node_ids, dtype=NODE_ID)
         if not node_ids.size:
             return result
         mask = np.in1d(node_ids, np.fromiter(self.children_cache.keys(), dtype=NODE_ID))
@@ -105,6 +109,7 @@ def children_multiple(self, node_ids: np.ndarray, *, flatten=False):
 
     def cross_chunk_edges_multiple(self, node_ids: np.ndarray):
         result = {}
+        node_ids = np.array(node_ids, dtype=NODE_ID)
         if not node_ids.size:
             return result
         mask = np.in1d(

From 0fcf524107129ce16ad1c1e4be79279c2d742600 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 22 Aug 2023 17:23:47 +0000
Subject: [PATCH 034/105] fix: linting issues

---
 pychunkedgraph/graph/chunkedgraph.py | 14 ++++----------
 pychunkedgraph/graph/operation.py    |  6 +++---
 pychunkedgraph/graph/subgraph.py     |  4 ++--
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 1cdecd77a..f4e87290c 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -24,6 +24,8 @@
 from .edges import utils as edge_utils
 from .chunks import utils as chunk_utils
 from .chunks import hierarchy as chunk_hierarchy
+from .subgraph import get_subgraph_nodes
+from .subgraph import get_subgraph_edges_and_leaves
 
 
 class ChunkedGraph:
@@ -524,12 +526,10 @@ def get_subgraph(
         edges_only: bool = False,
         leaves_only: bool = False,
         return_flattened: bool = False,
-    ) -> typing.Tuple[typing.Dict, typing.Dict, Edges]:
+    ) -> typing.Tuple[typing.Dict, typing.Tuple[Edges]]:
         """
         Generic subgraph method.
         """
-        from .subgraph import get_subgraph_nodes
-        from .subgraph import get_subgraph_edges_and_leaves
 
         if return_layers is None:
             return_layers = [2]
@@ -560,8 +560,6 @@ def get_subgraph_nodes(
         Get the children of `node_ids` that are at each of
         return_layers within the specified bounding box.
         """
-        from .subgraph import get_subgraph_nodes
-
         if return_layers is None:
             return_layers = [2]
 
@@ -584,8 +582,6 @@ def get_subgraph_edges(
         """
         Get the atomic edges of the `node_ids` within the specified bounding box.
         """
-        from .subgraph import get_subgraph_edges_and_leaves
-
         return get_subgraph_edges_and_leaves(
             self, node_id_or_ids, bbox, bbox_is_coordinate, True, False
         )
@@ -599,8 +595,6 @@ def get_subgraph_leaves(
         """
         Get the supervoxels of the `node_ids` within the specified bounding box.
         """
-        from .subgraph import get_subgraph_edges_and_leaves
-
         return get_subgraph_edges_and_leaves(
             self, node_id_or_ids, bbox, bbox_is_coordinate, False, True
         )
@@ -625,7 +619,7 @@ def get_fake_edges(
 
     def get_l2_agglomerations(
         self, level2_ids: np.ndarray, edges_only: bool = False
-    ) -> typing.Tuple[typing.Dict[int, types.Agglomeration], np.ndarray]:
+    ) -> typing.Tuple[typing.Dict[int, types.Agglomeration], typing.Tuple[Edges]]:
         """
         Children of Level 2 Node IDs and edges.
         Edges are read from cloud storage.
diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py
index b864a2d0d..39668565f 100644
--- a/pychunkedgraph/graph/operation.py
+++ b/pychunkedgraph/graph/operation.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-docstring, too-many-lines, protected-access, broad_exception_raised
+# pylint: disable=invalid-name, missing-docstring, too-many-lines, protected-access, broad-exception-raised
 
 from abc import ABC, abstractmethod
 from collections import namedtuple
@@ -892,11 +892,11 @@ def _apply(
             self.cg.meta.split_bounding_offset,
         )
         with TimeIt("get_subgraph", self.cg.graph_id, operation_id):
-            l2id_agglomeration_d, edges = self.cg.get_subgraph(
+            l2id_agglomeration_d, edges_tuple = self.cg.get_subgraph(
                 root_ids.pop(), bbox=bbox, bbox_is_coordinate=True
             )
 
-            edges = reduce(lambda x, y: x + y, edges, Edges([], []))
+            edges = reduce(lambda x, y: x + y, edges_tuple, Edges([], []))
             supervoxels = np.concatenate(
                 [agg.supervoxels for agg in l2id_agglomeration_d.values()]
             )
diff --git a/pychunkedgraph/graph/subgraph.py b/pychunkedgraph/graph/subgraph.py
index 5b50b7c43..1538b3cc2 100644
--- a/pychunkedgraph/graph/subgraph.py
+++ b/pychunkedgraph/graph/subgraph.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, missing-docstring
+# pylint: disable=invalid-name, missing-docstring, import-outside-toplevel
 
 from typing import List
 from typing import Dict
@@ -155,7 +155,7 @@ def get_subgraph_edges_and_leaves(
     bbox_is_coordinate: bool = False,
     edges_only: bool = False,
     leaves_only: bool = False,
-) -> Tuple[Dict, Dict, Edges]:
+) -> Tuple[Dict, Tuple[Edges]]:
     """Get the edges and/or leaves of the specified node_ids within the specified bounding box."""
     from .types import empty_1d
 

From c5e18d0f86e804cd0eb2f65d53ce997c62147878 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 22 Aug 2023 17:24:05 +0000
Subject: [PATCH 035/105] wip: edits refactor

---
 pychunkedgraph/graph/edits.py | 169 ++++++++++++++++++----------------
 1 file changed, 92 insertions(+), 77 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index ae7c25b4c..0086f00cd 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -39,7 +39,7 @@ def _analyze_affected_edges(
     cg, atomic_edges: Iterable[np.ndarray], parent_ts: datetime.datetime = None
 ) -> Tuple[Iterable, Dict]:
     """
-    Returns l2 edges within chunk and adds self edges for nodes in cross chunk edges.
+    Returns l2 edges within chunk and self edges for nodes in cross chunk edges.
 
     Also returns new cross edges dicts for nodes crossing chunk boundary.
     """
@@ -208,20 +208,30 @@ def add_edges(
     for cc_indices in components:
         l2ids_ = graph_ids[cc_indices]
         new_id = cg.id_client.create_node_id(cg.get_chunk_id(l2ids_[0]))
-        cg.cache.children_cache[new_id] = np.concatenate(
-            [atomic_children_d[l2id] for l2id in l2ids_]
-        )
-        cg.cache.cross_chunk_edges_cache[new_id] = concatenate_cross_edge_dicts(
-            [cross_edges_d[l2id] for l2id in l2ids_]
-        )
-        cache_utils.update(
-            cg.cache.parents_cache, cg.cache.children_cache[new_id], new_id
-        )
         new_l2_ids.append(new_id)
         new_old_id_d[new_id].update(l2ids_)
         for id_ in l2ids_:
             old_new_id_d[id_].add(new_id)
 
+        # update cache
+        # map parent to new merged children and vice versa
+        merged_children = np.concatenate([atomic_children_d[l2id] for l2id in l2ids_])
+        cg.cache.children_cache[new_id] = merged_children
+        cache_utils.update(cg.cache.parents_cache, merged_children, new_id)
+
+    # update cross chunk edges by replacing old_ids with new
+    # this can be done only after all new IDs have been created
+    for new_id, cc_indices in zip(new_l2_ids, components):
+        l2ids_ = graph_ids[cc_indices]
+        new_cx_edges_d = {}
+        cx_edges = [cross_edges_d[l2id] for l2id in l2ids_]
+        cx_edges_d = concatenate_cross_edge_dicts(cx_edges, unique=True)
+        temp_map = {k: next(iter(v)) for k, v in old_new_id_d.items()}
+        for layer, edges in cx_edges_d.items():
+            edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+            new_cx_edges_d[layer] = edges
+        cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
+
     create_parents = CreateParentNodes(
         cg,
         new_l2_ids=new_l2_ids,
@@ -234,50 +244,25 @@ def add_edges(
     )
 
     new_roots = create_parents.run()
-    print("new_roots", new_roots, cg.meta.layer_count)
-    print(cg.get_children(np.array(new_roots, dtype=np.uint64)))
+    print()
+    print("layers", cg.meta.layer_count, "new_roots", new_roots)
     new_entries = create_parents.create_new_entries()
     return new_roots, new_l2_ids, new_entries
 
 
-def _process_l2_agglomeration(
-    agg: types.Agglomeration,
-    removed_edges: np.ndarray,
-    atomic_cross_edges_d: Dict[int, np.ndarray],
-):
+def _process_l2_agglomeration(agg: types.Agglomeration, removed_edges: np.ndarray):
     """
     For a given L2 id, remove given edges; calculate new connected components.
     """
     chunk_edges = agg.in_edges.get_pairs()
-    cross_edges = np.concatenate([types.empty_2d, *atomic_cross_edges_d.values()])
     chunk_edges = chunk_edges[~in2d(chunk_edges, removed_edges)]
-    cross_edges = cross_edges[~in2d(cross_edges, removed_edges)]
 
     isolated_ids = agg.supervoxels[~np.in1d(agg.supervoxels, chunk_edges)]
     isolated_edges = np.column_stack((isolated_ids, isolated_ids))
     graph, _, _, graph_ids = flatgraph.build_gt_graph(
         np.concatenate([chunk_edges, isolated_edges]), make_directed=True
     )
-    return flatgraph.connected_components(graph), graph_ids, cross_edges
-
-
-def _filter_component_cross_edges(
-    cc_ids: np.ndarray, cross_edges: np.ndarray, cross_edge_layers: np.ndarray
-) -> Dict[int, np.ndarray]:
-    """
-    Filters cross edges for a connected component `cc_ids`
-    from `cross_edges` of the complete chunk.
-    """
-    mask = np.in1d(cross_edges[:, 0], cc_ids)
-    cross_edges_ = cross_edges[mask]
-    cross_edge_layers_ = cross_edge_layers[mask]
-    edges_d = {}
-    for layer in np.unique(cross_edge_layers_):
-        edge_m = cross_edge_layers_ == layer
-        _cross_edges = cross_edges_[edge_m]
-        if _cross_edges.size:
-            edges_d[layer] = _cross_edges
-    return edges_d
+    return flatgraph.connected_components(graph), graph_ids
 
 
 def remove_edges(
@@ -291,10 +276,9 @@ def remove_edges(
 ):
     edges, _ = _analyze_affected_edges(cg, atomic_edges, parent_ts=parent_ts)
     l2ids = np.unique(edges)
-    assert (
-        np.unique(cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts)).size
-        == 1
-    ), "L2 IDs must belong to same root."
+    roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts)
+    assert np.unique(roots).size == 1, "L2 IDs must belong to same root."
+
     new_old_id_d, old_new_id_d, old_hierarchy_d = _init_old_hierarchy(
         cg, l2ids, parent_ts=parent_ts
     )
@@ -305,20 +289,14 @@ def remove_edges(
     new_l2_ids = []
     for id_ in l2ids:
         l2_agg = l2id_agglomeration_d[id_]
-        ccs, graph_ids, cross_edges = _process_l2_agglomeration(
-            l2_agg, removed_edges, cross_edges_d[id_]
-        )
-        # done here to avoid repeat computation in loop
-        cross_edge_layers = cg.get_cross_chunk_edges_layer(cross_edges)
+        ccs, graph_ids = _process_l2_agglomeration(l2_agg, removed_edges)
         new_parent_ids = cg.id_client.create_node_ids(
             l2id_chunk_id_d[l2_agg.node_id], len(ccs)
         )
         for i_cc, cc in enumerate(ccs):
             new_id = new_parent_ids[i_cc]
             cg.cache.children_cache[new_id] = graph_ids[cc]
-            cg.cache.atomic_cx_edges_cache[new_id] = _filter_component_cross_edges(
-                graph_ids[cc], cross_edges, cross_edge_layers
-            )
+            cg.cache.atomic_cx_edges_cache[new_id] = None
             cache_utils.update(cg.cache.parents_cache, graph_ids[cc], new_id)
             new_l2_ids.append(new_id)
             new_old_id_d[new_id].add(id_)
@@ -358,7 +336,6 @@ def __init__(
         self._new_old_id_d = new_old_id_d
         self._old_new_id_d = old_new_id_d
         self._new_ids_d = defaultdict(list)  # new IDs in each layer
-        self._cross_edges_d = {}
         self._operation_id = operation_id
         self._time_stamp = time_stamp
         self._last_successful_ts = parent_ts
@@ -385,6 +362,13 @@ def _get_old_ids(self, new_ids):
         ]
         return np.concatenate(old_ids)
 
+    def _get_new_ids(self, old_ids):
+        old_ids = [
+            np.array(list(self._old_new_id_d[id_]), dtype=basetypes.NODE_ID)
+            for id_ in old_ids
+        ]
+        return np.concatenate(old_ids)
+
     def _get_connected_components(self, node_ids: np.ndarray, layer: int):
         with TimeIt(
             f"get_cross_chunk_edges.{layer}",
@@ -392,20 +376,16 @@ def _get_connected_components(self, node_ids: np.ndarray, layer: int):
             self._operation_id,
         ):
             cross_edges_d = self.cg.get_cross_chunk_edges(node_ids)
-            self._cross_edges_d.update(cross_edges_d)
 
-        cross_edges = [types.empty_2d]
+        cx_edges = [types.empty_2d]
         for id_ in node_ids:
-            edges_ = self._cross_edges_d[id_].get(layer, types.empty_2d)
-            cross_edges.append(edges_)
-
-        cross_edges = np.concatenate([*cross_edges, np.vstack([node_ids, node_ids]).T])
-        temp_d = {k: next(iter(v)) for k, v in self._old_new_id_d.items()}
-        cross_edges = fastremap.remap(cross_edges, temp_d, preserve_missing_labels=True)
+            edges_ = cross_edges_d[id_].get(layer, types.empty_2d)
+            cx_edges.append(edges_)
 
-        graph, _, _, graph_ids = flatgraph.build_gt_graph(
-            cross_edges, make_directed=True
-        )
+        cx_edges = np.concatenate([*cx_edges, np.vstack([node_ids, node_ids]).T])
+        temp_map = {k: next(iter(v)) for k, v in self._old_new_id_d.items()}
+        cx_edges = fastremap.remap(cx_edges, temp_map, preserve_missing_labels=True)
+        graph, _, _, graph_ids = flatgraph.build_gt_graph(cx_edges, make_directed=True)
         return flatgraph.connected_components(graph), graph_ids
 
     def _get_layer_node_ids(
@@ -419,15 +399,37 @@ def _get_layer_node_ids(
         # replace old identities with new IDs
         mask = np.in1d(node_ids, old_ids)
         node_ids = np.concatenate(
-            [
-                np.array(list(self._old_new_id_d[id_]), dtype=basetypes.NODE_ID)
-                for id_ in node_ids[mask]
-            ]
-            + [node_ids[~mask], new_ids]
+            [self._get_new_ids(node_ids[mask]), node_ids[~mask], new_ids]
         )
         node_ids = np.unique(node_ids)
         layer_mask = self.cg.get_chunk_layers(node_ids) == layer
         return node_ids[layer_mask]
+        # return node_ids
+
+    def _update_cross_edge_cache(self, parent, children):
+        """
+        updates cross chunk edges in cache;
+        this can only be done after all new components at a layer have IDs
+        """
+        cx_edges_d = self.cg.get_cross_chunk_edges(children)
+        cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values(), unique=True)
+
+        parent_layer = self.cg.get_chunk_layer(parent)
+        edge_nodes = np.unique(np.concatenate([*cx_edges_d.values(), types.empty_2d]))
+        edge_parents = self.cg.get_roots(
+            edge_nodes, stop_layer=parent_layer, ceil=False
+        )
+        edge_parents_d = dict(zip(edge_nodes, edge_parents))
+
+        new_cx_edges_d = {}
+        for layer in range(parent_layer, self.cg.meta.layer_count):
+            layer_edges = cx_edges_d.get(layer, types.empty_2d)
+            if len(layer_edges) == 0:
+                continue
+            new_cx_edges_d[layer] = fastremap.remap(
+                layer_edges, edge_parents_d, preserve_missing_labels=True
+            )
+        self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
 
     def _create_new_parents(self, layer: int):
         """
@@ -439,25 +441,30 @@ def _create_new_parents(self, layer: int):
         get cross edges of all, find connected components
         update parent old IDs
         """
+        parent_layer = layer + 1
         new_ids = self._new_ids_d[layer]
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
+        print(layer, layer_node_ids)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
+        new_parent_ids = []
         for cc_indices in components:
-            parent_layer = layer + 1
             cc_ids = graph_ids[cc_indices]
             if len(cc_ids) == 1:
                 # skip connection
                 parent_layer = self.cg.meta.layer_count
                 for l in range(layer + 1, self.cg.meta.layer_count):
-                    if len(self._cross_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0:
+                    cx_edges_d = self.cg.get_cross_chunk_edges([cc_ids[0]])
+                    if len(cx_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0:
                         parent_layer = l
                         break
-
             parent_id = self.cg.id_client.create_node_id(
                 self.cg.get_parent_chunk_id(cc_ids[0], parent_layer),
                 root_chunk=parent_layer == self.cg.meta.layer_count,
             )
             self._new_ids_d[parent_layer].append(parent_id)
+            self._update_id_lineage(parent_id, cc_ids, layer, parent_layer)
+            new_parent_ids.append(parent_id)
+
             self.cg.cache.children_cache[parent_id] = cc_ids
             cache_utils.update(
                 self.cg.cache.parents_cache,
@@ -465,11 +472,9 @@ def _create_new_parents(self, layer: int):
                 parent_id,
             )
 
-            children_cx_edges = [self._cross_edges_d[child] for child in cc_ids]
-            cx_edges = concatenate_cross_edge_dicts(children_cx_edges)
-            self.cg.cache.cross_chunk_edges_cache[parent_id] = cx_edges
-
-            self._update_id_lineage(parent_id, cc_ids, layer, parent_layer)
+        for new_id in new_parent_ids:
+            children = self.cg.get_children(new_id)
+            self._update_cross_edge_cache(new_id, children)
 
     def run(self) -> Iterable:
         """
@@ -492,9 +497,14 @@ def _update_root_id_lineage(self):
         new_root_ids = self._new_ids_d[self.cg.meta.layer_count]
         former_root_ids = self._get_old_ids(new_root_ids)
         former_root_ids = np.unique(former_root_ids)
+
+        print()
+        print(former_root_ids, "->", new_root_ids)
+        print(self.cg.get_children(former_root_ids))
+        print(self.cg.get_children(np.array(new_root_ids, dtype=np.uint64)))
         assert (
             len(former_root_ids) < 2 or len(new_root_ids) < 2
-        ), "Something went wrong."
+        ), "Result inconsistent with either split or merge effects."
         rows = []
         for new_root_id in new_root_ids:
             val_dict = {
@@ -524,10 +534,15 @@ def _update_root_id_lineage(self):
         return rows
 
     def _get_cross_edges_val_dict(self):
+        print("haha", self.cg.get_cross_chunk_edges([216172782113783809]))
         val_dicts = {}
         for layer in range(2, self.cg.meta.layer_count):
             new_ids = np.array(self._new_ids_d[layer], dtype=basetypes.NODE_ID)
             cross_edges_d = self.cg.get_cross_chunk_edges(new_ids)
+            print()
+            print(layer, new_ids)
+            print("cx", cross_edges_d)
+            print("ch", self.cg.get_children(new_ids))
             for id_ in new_ids:
                 val_dict = {}
                 for layer, edges in cross_edges_d[id_].items():

From 50bb03ba1e445f0dc6449e94f73239f3dfa44038 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 23 Aug 2023 02:59:22 +0000
Subject: [PATCH 036/105] fix: undo gcrule changes

---
 pychunkedgraph/graph/attributes.py            |  2 +-
 .../graph/client/bigtable/client.py           | 26 +++++++++----------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index 84283161d..33f675dc8 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -122,7 +122,7 @@ class Connectivity:
 
     FakeEdges = _Attribute(
         key=b"fake_edges",
-        family_id="5",
+        family_id="4",
         serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
     )
 
diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 1bd027255..6601b654e 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -72,6 +72,18 @@ def __init__(
         self._version = None
         self._max_row_key_count = config.MAX_ROW_KEY_COUNT
 
+    def _create_column_families(self):
+        f = self._table.column_family("0")
+        f.create()
+        f = self._table.column_family("1", gc_rule=MaxVersionsGCRule(1))
+        f.create()
+        f = self._table.column_family("2")
+        f.create()
+        f = self._table.column_family("3", gc_rule=MaxAgeGCRule(timedelta(days=365)))
+        f.create()
+        f = self._table.column_family("4")
+        f.create()
+
     @property
     def graph_meta(self):
         return self._graph_meta
@@ -629,20 +641,6 @@ def get_compatible_timestamp(
         return utils.get_google_compatible_time_stamp(time_stamp, round_up=round_up)
 
     # PRIVATE METHODS
-    def _create_column_families(self):
-        f = self._table.column_family("0")
-        f.create()
-        f = self._table.column_family("1", gc_rule=MaxVersionsGCRule(1))
-        f.create()
-        f = self._table.column_family("2")
-        f.create()
-        f = self._table.column_family("3", gc_rule=MaxAgeGCRule(timedelta(days=365)))
-        f.create()
-        f = self._table.column_family("4", gc_rule=MaxVersionsGCRule(1))
-        f.create()
-        f = self._table.column_family("5")
-        f.create()
-
     def _get_ids_range(self, key: bytes, size: int) -> typing.Tuple:
         """Returns a range (min, max) of IDs for a given `key`."""
         column = attributes.Concurrency.Counter

From 5967596eafc2c03cc27ca07e8ed719c74b867a30 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 23 Aug 2023 03:01:18 +0000
Subject: [PATCH 037/105] fix: add mock_edges; linting issues

---
 pychunkedgraph/debug/utils.py        | 4 +++-
 pychunkedgraph/graph/chunkedgraph.py | 2 ++
 pychunkedgraph/graph/types.py        | 3 +--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/debug/utils.py b/pychunkedgraph/debug/utils.py
index 179f50aef..e194f4ee1 100644
--- a/pychunkedgraph/debug/utils.py
+++ b/pychunkedgraph/debug/utils.py
@@ -1,3 +1,5 @@
+# pylint: disable=invalid-name, missing-docstring, bare-except, unidiomatic-typecheck
+
 import numpy as np
 
 from ..graph import ChunkedGraph
@@ -27,7 +29,7 @@ def print_node(
     if cg.get_chunk_layer(node) <= stop_layer:
         return
     for child in children:
-        print_node(cg, child, indent=indent + 1, stop_layer=stop_layer)
+        print_node(cg, child, indent=indent + 4, stop_layer=stop_layer)
 
 
 def get_l2children(cg: ChunkedGraph, node: NODE_ID) -> np.ndarray:
diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index f4e87290c..a118d4c82 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -642,6 +642,8 @@ def get_l2_agglomerations(
             chain(edges_d.values(), fake_edges.values()),
             Edges([], []),
         )
+        if self.mock_edges is not None:
+            all_chunk_edges += self.mock_edges
 
         if edges_only:
             if self.mock_edges is not None:
diff --git a/pychunkedgraph/graph/types.py b/pychunkedgraph/graph/types.py
index 9a551f35c..1f35e5f6b 100644
--- a/pychunkedgraph/graph/types.py
+++ b/pychunkedgraph/graph/types.py
@@ -1,5 +1,4 @@
-from typing import Dict
-from typing import Iterable
+# pylint: disable=invalid-name, missing-docstring
 from collections import namedtuple
 
 import numpy as np

From 005b0278a7cb4d425d98a11df9e8d979ec0bd024 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 23 Aug 2023 22:55:50 +0000
Subject: [PATCH 038/105] feat: edits using cached cross edges

---
 pychunkedgraph/graph/edits.py | 248 +++++++++++++++++++++++-----------
 1 file changed, 172 insertions(+), 76 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 0086f00cd..ba9481139 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -221,6 +221,7 @@ def add_edges(
 
     # update cross chunk edges by replacing old_ids with new
     # this can be done only after all new IDs have been created
+    updated_entries = []
     for new_id, cc_indices in zip(new_l2_ids, components):
         l2ids_ = graph_ids[cc_indices]
         new_cx_edges_d = {}
@@ -230,8 +231,36 @@ def add_edges(
         for layer, edges in cx_edges_d.items():
             edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
             new_cx_edges_d[layer] = edges
+            assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
 
+        # must also update cross chunk edges in reverse (counterparts)
+        layer_edges = new_cx_edges_d.get(2, types.empty_2d)
+        counterparts = layer_edges[:, 1]
+        counterpart_cx_edges_d = cg.get_cross_chunk_edges(counterparts)
+        temp_map = {
+            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
+        }
+        for counterpart, edges_d in counterpart_cx_edges_d.items():
+            val_dict = {}
+            for layer in range(2, cg.meta.layer_count):
+                edges = edges_d.get(layer, types.empty_2d)
+                if edges.size == 0:
+                    continue
+                assert np.all(edges[:, 0] == counterpart)
+                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+                edges_d[layer] = edges
+                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+            if not val_dict:
+                continue
+            cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
+            row = cg.client.mutate_row(
+                serialize_uint64(counterpart),
+                val_dict,
+                time_stamp=time_stamp,
+            )
+            updated_entries.append(row)
+
     create_parents = CreateParentNodes(
         cg,
         new_l2_ids=new_l2_ids,
@@ -244,10 +273,8 @@ def add_edges(
     )
 
     new_roots = create_parents.run()
-    print()
-    print("layers", cg.meta.layer_count, "new_roots", new_roots)
-    new_entries = create_parents.create_new_entries()
-    return new_roots, new_l2_ids, new_entries
+    create_parents.create_new_entries()
+    return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
 
 
 def _process_l2_agglomeration(agg: types.Agglomeration, removed_edges: np.ndarray):
@@ -257,12 +284,36 @@ def _process_l2_agglomeration(agg: types.Agglomeration, removed_edges: np.ndarra
     chunk_edges = agg.in_edges.get_pairs()
     chunk_edges = chunk_edges[~in2d(chunk_edges, removed_edges)]
 
+    # cross during edits refers to all edges crossing chunk boundary
+    cross_edges = [agg.out_edges.get_pairs(), agg.cross_edges.get_pairs()]
+    cross_edges = np.concatenate(cross_edges)
+    cross_edges = cross_edges[~in2d(cross_edges, removed_edges)]
+
     isolated_ids = agg.supervoxels[~np.in1d(agg.supervoxels, chunk_edges)]
     isolated_edges = np.column_stack((isolated_ids, isolated_ids))
     graph, _, _, graph_ids = flatgraph.build_gt_graph(
         np.concatenate([chunk_edges, isolated_edges]), make_directed=True
     )
-    return flatgraph.connected_components(graph), graph_ids
+    return flatgraph.connected_components(graph), graph_ids, cross_edges
+
+
+def _filter_component_cross_edges(
+    component_ids: np.ndarray, cross_edges: np.ndarray, cross_edge_layers: np.ndarray
+) -> Dict[int, np.ndarray]:
+    """
+    Filters cross edges for a connected component `cc_ids`
+    from `cross_edges` of the complete chunk.
+    """
+    mask = np.in1d(cross_edges[:, 0], component_ids)
+    cross_edges_ = cross_edges[mask]
+    cross_edge_layers_ = cross_edge_layers[mask]
+    edges_d = {}
+    for layer in np.unique(cross_edge_layers_):
+        edge_m = cross_edge_layers_ == layer
+        _cross_edges = cross_edges_[edge_m]
+        if _cross_edges.size:
+            edges_d[layer] = _cross_edges
+    return edges_d
 
 
 def remove_edges(
@@ -282,25 +333,67 @@ def remove_edges(
     new_old_id_d, old_new_id_d, old_hierarchy_d = _init_old_hierarchy(
         cg, l2ids, parent_ts=parent_ts
     )
-    l2id_chunk_id_d = dict(zip(l2ids.tolist(), cg.get_chunk_ids_from_node_ids(l2ids)))
-    cross_edges_d = cg.get_cross_chunk_edges(l2ids)
+    chunk_id_map = dict(zip(l2ids.tolist(), cg.get_chunk_ids_from_node_ids(l2ids)))
 
     removed_edges = np.concatenate([atomic_edges, atomic_edges[:, ::-1]], axis=0)
     new_l2_ids = []
     for id_ in l2ids:
-        l2_agg = l2id_agglomeration_d[id_]
-        ccs, graph_ids = _process_l2_agglomeration(l2_agg, removed_edges)
-        new_parent_ids = cg.id_client.create_node_ids(
-            l2id_chunk_id_d[l2_agg.node_id], len(ccs)
-        )
+        agg = l2id_agglomeration_d[id_]
+        ccs, graph_ids, cross_edges = _process_l2_agglomeration(agg, removed_edges)
+        new_parents = cg.id_client.create_node_ids(chunk_id_map[agg.node_id], len(ccs))
+
+        cross_edge_layers = cg.get_cross_chunk_edges_layer(cross_edges)
         for i_cc, cc in enumerate(ccs):
-            new_id = new_parent_ids[i_cc]
-            cg.cache.children_cache[new_id] = graph_ids[cc]
-            cg.cache.atomic_cx_edges_cache[new_id] = None
-            cache_utils.update(cg.cache.parents_cache, graph_ids[cc], new_id)
+            new_id = new_parents[i_cc]
             new_l2_ids.append(new_id)
             new_old_id_d[new_id].add(id_)
             old_new_id_d[id_].add(new_id)
+            cg.cache.children_cache[new_id] = graph_ids[cc]
+            cache_utils.update(cg.cache.parents_cache, graph_ids[cc], new_id)
+            cg.cache.cross_chunk_edges_cache[new_id] = _filter_component_cross_edges(
+                graph_ids[cc], cross_edges, cross_edge_layers
+            )
+
+    updated_entries = []
+    new_cx_edges_d = cg.get_cross_chunk_edges(new_l2_ids)
+    for new_id in new_l2_ids:
+        cx_edges_d = new_cx_edges_d.get(new_id, {})
+        for layer, edges in cx_edges_d.items():
+            svs = np.unique(edges)
+            parents = cg.get_parents(svs)
+            temp_map = dict(zip(svs, parents))
+
+            edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+            edges = np.unique(edges, axis=0)
+            cx_edges_d[layer] = edges
+            assert np.all(edges[:, 0] == new_id)
+        cg.cache.cross_chunk_edges_cache[new_id] = cx_edges_d
+
+        layer_edges = cx_edges_d.get(2, types.empty_2d)
+        counterparts = layer_edges[:, 1]
+        counterpart_cx_edges_d = cg.get_cross_chunk_edges(counterparts)
+        temp_map = {
+            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
+        }
+        for counterpart, edges_d in counterpart_cx_edges_d.items():
+            val_dict = {}
+            for layer in range(2, cg.meta.layer_count):
+                edges = edges_d.get(layer, types.empty_2d)
+                if edges.size == 0:
+                    continue
+                assert np.all(edges[:, 0] == counterpart)
+                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+                edges_d[layer] = edges
+                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+            if not val_dict:
+                continue
+            cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
+            row = cg.client.mutate_row(
+                serialize_uint64(counterpart),
+                val_dict,
+                time_stamp=time_stamp,
+            )
+            updated_entries.append(row)
 
     create_parents = CreateParentNodes(
         cg,
@@ -313,8 +406,16 @@ def remove_edges(
         parent_ts=parent_ts,
     )
     new_roots = create_parents.run()
-    new_entries = create_parents.create_new_entries()
-    return new_roots, new_l2_ids, new_entries
+    create_parents.create_new_entries()
+    return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
+
+
+def _get_flipped_ids(id_map, node_ids):
+    """
+    returns old or new ids according to the map
+    """
+    ids = [np.array(list(id_map[id_]), dtype=basetypes.NODE_ID) for id_ in node_ids]
+    return np.concatenate(ids)
 
 
 class CreateParentNodes:
@@ -331,6 +432,7 @@ def __init__(
         parent_ts: datetime.datetime = None,
     ):
         self.cg = cg
+        self.new_entries = []
         self._new_l2_ids = new_l2_ids
         self._old_hierarchy_d = old_hierarchy_d
         self._new_old_id_d = new_old_id_d
@@ -355,20 +457,6 @@ def _update_id_lineage(
                 self._new_old_id_d[parent].add(old_id)
                 self._old_new_id_d[old_id].add(parent)
 
-    def _get_old_ids(self, new_ids):
-        old_ids = [
-            np.array(list(self._new_old_id_d[id_]), dtype=basetypes.NODE_ID)
-            for id_ in new_ids
-        ]
-        return np.concatenate(old_ids)
-
-    def _get_new_ids(self, old_ids):
-        old_ids = [
-            np.array(list(self._old_new_id_d[id_]), dtype=basetypes.NODE_ID)
-            for id_ in old_ids
-        ]
-        return np.concatenate(old_ids)
-
     def _get_connected_components(self, node_ids: np.ndarray, layer: int):
         with TimeIt(
             f"get_cross_chunk_edges.{layer}",
@@ -381,10 +469,7 @@ def _get_connected_components(self, node_ids: np.ndarray, layer: int):
         for id_ in node_ids:
             edges_ = cross_edges_d[id_].get(layer, types.empty_2d)
             cx_edges.append(edges_)
-
         cx_edges = np.concatenate([*cx_edges, np.vstack([node_ids, node_ids]).T])
-        temp_map = {k: next(iter(v)) for k, v in self._old_new_id_d.items()}
-        cx_edges = fastremap.remap(cx_edges, temp_map, preserve_missing_labels=True)
         graph, _, _, graph_ids = flatgraph.build_gt_graph(cx_edges, make_directed=True)
         return flatgraph.connected_components(graph), graph_ids
 
@@ -392,14 +477,14 @@ def _get_layer_node_ids(
         self, new_ids: np.ndarray, layer: int
     ) -> Tuple[np.ndarray, np.ndarray]:
         # get old identities of new IDs
-        old_ids = self._get_old_ids(new_ids)
+        old_ids = _get_flipped_ids(self._new_old_id_d, new_ids)
         # get their parents, then children of those parents
-        parents = self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts)
-        node_ids = self.cg.get_children(np.unique(parents), flatten=True)
+        old_parents = self.cg.get_parents(old_ids, time_stamp=self._last_successful_ts)
+        siblings = self.cg.get_children(np.unique(old_parents), flatten=True)
         # replace old identities with new IDs
-        mask = np.in1d(node_ids, old_ids)
+        mask = np.in1d(siblings, old_ids)
         node_ids = np.concatenate(
-            [self._get_new_ids(node_ids[mask]), node_ids[~mask], new_ids]
+            [_get_flipped_ids(self._old_new_id_d, old_ids), siblings[~mask], new_ids]
         )
         node_ids = np.unique(node_ids)
         layer_mask = self.cg.get_chunk_layers(node_ids) == layer
@@ -423,14 +508,40 @@ def _update_cross_edge_cache(self, parent, children):
 
         new_cx_edges_d = {}
         for layer in range(parent_layer, self.cg.meta.layer_count):
-            layer_edges = cx_edges_d.get(layer, types.empty_2d)
-            if len(layer_edges) == 0:
+            edges = cx_edges_d.get(layer, types.empty_2d)
+            if len(edges) == 0:
                 continue
-            new_cx_edges_d[layer] = fastremap.remap(
-                layer_edges, edge_parents_d, preserve_missing_labels=True
-            )
+            edges = fastremap.remap(edges, edge_parents_d, preserve_missing_labels=True)
+            new_cx_edges_d[layer] = np.unique(edges, axis=0)
+            assert np.all(edges[:, 0] == parent)
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
 
+        layer_edges = new_cx_edges_d.get(parent_layer, types.empty_2d)
+        counterparts = layer_edges[:, 1]
+        counterpart_cx_edges_d = self.cg.get_cross_chunk_edges(counterparts)
+        temp_map = {
+            old_id: parent for old_id in _get_flipped_ids(self._new_old_id_d, [parent])
+        }
+        for counterpart, edges_d in counterpart_cx_edges_d.items():
+            val_dict = {}
+            for layer in range(parent_layer, self.cg.meta.layer_count):
+                edges = edges_d.get(layer, types.empty_2d)
+                if edges.size == 0:
+                    continue
+                assert np.all(edges[:, 0] == counterpart)
+                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+                edges_d[layer] = edges
+                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+            if not val_dict:
+                continue
+            self.cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
+            row = self.cg.client.mutate_row(
+                serialize_uint64(counterpart),
+                val_dict,
+                time_stamp=self._time_stamp,
+            )
+            self.new_entries.append(row)
+
     def _create_new_parents(self, layer: int):
         """
         keep track of old IDs
@@ -444,7 +555,6 @@ def _create_new_parents(self, layer: int):
         parent_layer = layer + 1
         new_ids = self._new_ids_d[layer]
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
-        print(layer, layer_node_ids)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         new_parent_ids = []
         for cc_indices in components:
@@ -494,24 +604,17 @@ def run(self) -> Iterable:
         return self._new_ids_d[self.cg.meta.layer_count]
 
     def _update_root_id_lineage(self):
-        new_root_ids = self._new_ids_d[self.cg.meta.layer_count]
-        former_root_ids = self._get_old_ids(new_root_ids)
-        former_root_ids = np.unique(former_root_ids)
-
-        print()
-        print(former_root_ids, "->", new_root_ids)
-        print(self.cg.get_children(former_root_ids))
-        print(self.cg.get_children(np.array(new_root_ids, dtype=np.uint64)))
-        assert (
-            len(former_root_ids) < 2 or len(new_root_ids) < 2
-        ), "Result inconsistent with either split or merge effects."
-        rows = []
-        for new_root_id in new_root_ids:
+        new_roots = self._new_ids_d[self.cg.meta.layer_count]
+        former_roots = _get_flipped_ids(self._new_old_id_d, new_roots)
+        former_roots = np.unique(former_roots)
+
+        assert len(former_roots) < 2 or len(new_roots) < 2, "new roots are inconsistent"
+        for new_root_id in new_roots:
             val_dict = {
-                attributes.Hierarchy.FormerParent: np.array(former_root_ids),
+                attributes.Hierarchy.FormerParent: np.array(former_roots),
                 attributes.OperationLogs.OperationID: self._operation_id,
             }
-            rows.append(
+            self.new_entries.append(
                 self.cg.client.mutate_row(
                     serialize_uint64(new_root_id),
                     val_dict,
@@ -519,30 +622,24 @@ def _update_root_id_lineage(self):
                 )
             )
 
-        for former_root_id in former_root_ids:
+        for former_root_id in former_roots:
             val_dict = {
-                attributes.Hierarchy.NewParent: np.array(new_root_ids),
+                attributes.Hierarchy.NewParent: np.array(new_roots),
                 attributes.OperationLogs.OperationID: self._operation_id,
             }
-            rows.append(
+            self.new_entries.append(
                 self.cg.client.mutate_row(
                     serialize_uint64(former_root_id),
                     val_dict,
                     time_stamp=self._time_stamp,
                 )
             )
-        return rows
 
-    def _get_cross_edges_val_dict(self):
-        print("haha", self.cg.get_cross_chunk_edges([216172782113783809]))
+    def _get_cross_edges_val_dicts(self):
         val_dicts = {}
         for layer in range(2, self.cg.meta.layer_count):
             new_ids = np.array(self._new_ids_d[layer], dtype=basetypes.NODE_ID)
             cross_edges_d = self.cg.get_cross_chunk_edges(new_ids)
-            print()
-            print(layer, new_ids)
-            print("cx", cross_edges_d)
-            print("ch", self.cg.get_children(new_ids))
             for id_ in new_ids:
                 val_dict = {}
                 for layer, edges in cross_edges_d[id_].items():
@@ -551,8 +648,7 @@ def _get_cross_edges_val_dict(self):
         return val_dicts
 
     def create_new_entries(self) -> List:
-        rows = []
-        val_dicts = self._get_cross_edges_val_dict()
+        val_dicts = self._get_cross_edges_val_dicts()
         for layer in range(2, self.cg.meta.layer_count + 1):
             new_ids = self._new_ids_d[layer]
             for id_ in new_ids:
@@ -562,7 +658,7 @@ def create_new_entries(self) -> List:
                     self.cg.get_chunk_layers(children)
                 ) < self.cg.get_chunk_layer(id_), "Parent layer less than children."
                 val_dict[attributes.Hierarchy.Child] = children
-                rows.append(
+                self.new_entries.append(
                     self.cg.client.mutate_row(
                         serialize_uint64(id_),
                         val_dict,
@@ -570,11 +666,11 @@ def create_new_entries(self) -> List:
                     )
                 )
                 for child_id in children:
-                    rows.append(
+                    self.new_entries.append(
                         self.cg.client.mutate_row(
                             serialize_uint64(child_id),
                             {attributes.Hierarchy.Parent: id_},
                             time_stamp=self._time_stamp,
                         )
                     )
-        return rows + self._update_root_id_lineage()
+        self._update_root_id_lineage()

From db3911d7d205955ff2c4e1d9257372fe2ccf7cc2 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Thu, 24 Aug 2023 00:41:22 +0000
Subject: [PATCH 039/105] fix: use function for dry code

---
 pychunkedgraph/graph/edits.py | 135 +++++++++++++---------------------
 1 file changed, 51 insertions(+), 84 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index ba9481139..7a2a03408 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -178,6 +178,40 @@ def check_fake_edges(
     return atomic_edges, rows
 
 
+def _update_neighbor_cross_edges(
+    cg, new_id: int, cx_edges_d: dict, new_old_id_d: dict, time_stamp
+) -> list:
+    updated_entries = []
+    node_layer = cg.get_chunk_layer(new_id)
+    for cx_layer in range(node_layer, cg.meta.layer_count):
+        layer_edges = cx_edges_d.get(cx_layer, types.empty_2d)
+        counterparts = layer_edges[:, 1]
+        counterpart_cx_edges_d = cg.get_cross_chunk_edges(counterparts)
+        temp_map = {
+            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
+        }
+        for counterpart, edges_d in counterpart_cx_edges_d.items():
+            val_dict = {}
+            for layer in range(2, cg.meta.layer_count):
+                edges = edges_d.get(layer, types.empty_2d)
+                if edges.size == 0:
+                    continue
+                assert np.all(edges[:, 0] == counterpart)
+                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+                edges_d[layer] = edges
+                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+            if not val_dict:
+                continue
+            cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
+            row = cg.client.mutate_row(
+                serialize_uint64(counterpart),
+                val_dict,
+                time_stamp=time_stamp,
+            )
+            updated_entries.append(row)
+    return updated_entries
+
+
 def add_edges(
     cg,
     *,
@@ -233,33 +267,10 @@ def add_edges(
             new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
-
-        # must also update cross chunk edges in reverse (counterparts)
-        layer_edges = new_cx_edges_d.get(2, types.empty_2d)
-        counterparts = layer_edges[:, 1]
-        counterpart_cx_edges_d = cg.get_cross_chunk_edges(counterparts)
-        temp_map = {
-            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
-        }
-        for counterpart, edges_d in counterpart_cx_edges_d.items():
-            val_dict = {}
-            for layer in range(2, cg.meta.layer_count):
-                edges = edges_d.get(layer, types.empty_2d)
-                if edges.size == 0:
-                    continue
-                assert np.all(edges[:, 0] == counterpart)
-                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
-                edges_d[layer] = edges
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
-            if not val_dict:
-                continue
-            cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
-            row = cg.client.mutate_row(
-                serialize_uint64(counterpart),
-                val_dict,
-                time_stamp=time_stamp,
-            )
-            updated_entries.append(row)
+        entries = _update_neighbor_cross_edges(
+            cg, new_id, new_cx_edges_d, new_old_id_d, time_stamp
+        )
+        updated_entries.extend(entries)
 
     create_parents = CreateParentNodes(
         cg,
@@ -355,45 +366,23 @@ def remove_edges(
             )
 
     updated_entries = []
-    new_cx_edges_d = cg.get_cross_chunk_edges(new_l2_ids)
+    cx_edges_d = cg.get_cross_chunk_edges(new_l2_ids)
     for new_id in new_l2_ids:
-        cx_edges_d = new_cx_edges_d.get(new_id, {})
-        for layer, edges in cx_edges_d.items():
+        new_cx_edges_d = cx_edges_d.get(new_id, {})
+        for layer, edges in new_cx_edges_d.items():
             svs = np.unique(edges)
             parents = cg.get_parents(svs)
             temp_map = dict(zip(svs, parents))
 
             edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
             edges = np.unique(edges, axis=0)
-            cx_edges_d[layer] = edges
+            new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
-        cg.cache.cross_chunk_edges_cache[new_id] = cx_edges_d
-
-        layer_edges = cx_edges_d.get(2, types.empty_2d)
-        counterparts = layer_edges[:, 1]
-        counterpart_cx_edges_d = cg.get_cross_chunk_edges(counterparts)
-        temp_map = {
-            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
-        }
-        for counterpart, edges_d in counterpart_cx_edges_d.items():
-            val_dict = {}
-            for layer in range(2, cg.meta.layer_count):
-                edges = edges_d.get(layer, types.empty_2d)
-                if edges.size == 0:
-                    continue
-                assert np.all(edges[:, 0] == counterpart)
-                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
-                edges_d[layer] = edges
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
-            if not val_dict:
-                continue
-            cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
-            row = cg.client.mutate_row(
-                serialize_uint64(counterpart),
-                val_dict,
-                time_stamp=time_stamp,
-            )
-            updated_entries.append(row)
+        cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
+        entries = _update_neighbor_cross_edges(
+            cg, new_id, new_cx_edges_d, new_old_id_d, time_stamp
+        )
+        updated_entries.extend(entries)
 
     create_parents = CreateParentNodes(
         cg,
@@ -515,32 +504,10 @@ def _update_cross_edge_cache(self, parent, children):
             new_cx_edges_d[layer] = np.unique(edges, axis=0)
             assert np.all(edges[:, 0] == parent)
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
-
-        layer_edges = new_cx_edges_d.get(parent_layer, types.empty_2d)
-        counterparts = layer_edges[:, 1]
-        counterpart_cx_edges_d = self.cg.get_cross_chunk_edges(counterparts)
-        temp_map = {
-            old_id: parent for old_id in _get_flipped_ids(self._new_old_id_d, [parent])
-        }
-        for counterpart, edges_d in counterpart_cx_edges_d.items():
-            val_dict = {}
-            for layer in range(parent_layer, self.cg.meta.layer_count):
-                edges = edges_d.get(layer, types.empty_2d)
-                if edges.size == 0:
-                    continue
-                assert np.all(edges[:, 0] == counterpart)
-                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
-                edges_d[layer] = edges
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
-            if not val_dict:
-                continue
-            self.cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
-            row = self.cg.client.mutate_row(
-                serialize_uint64(counterpart),
-                val_dict,
-                time_stamp=self._time_stamp,
-            )
-            self.new_entries.append(row)
+        entries = _update_neighbor_cross_edges(
+            self.cg, parent, new_cx_edges_d, self._new_old_id_d, self._time_stamp
+        )
+        self.new_entries.extend(entries)
 
     def _create_new_parents(self, layer: int):
         """

From 7522de47c2999f55546692899cea30d8922b336c Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 28 Aug 2023 21:02:26 +0000
Subject: [PATCH 040/105] fix: mask skipped nodes

---
 pychunkedgraph/ingest/create/abstract_layers.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 9a339443f..df6375c5f 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -214,8 +214,10 @@ def _write(
 
                     layer_edges = node_cx_edges_d[layer]
                     edges_nodes = np.unique(layer_edges)
-                    edges_nodes_parents = cg.get_parents(edges_nodes)
-                    temp_map = dict(zip(edges_nodes, edges_nodes_parents))
+                    edges_nodes_layers = cg.get_chunk_layers(edges_nodes)
+                    mask = edges_nodes_layers < layer_id - 1
+                    edges_nodes_parents = cg.get_parents(edges_nodes[mask])
+                    temp_map = dict(zip(edges_nodes[mask], edges_nodes_parents))
 
                     layer_edges = fastremap.remap(
                         layer_edges, temp_map, preserve_missing_labels=True
@@ -230,7 +232,9 @@ def _write(
 
             row_id = serializers.serialize_uint64(parent_id)
             val_dict = {attributes.Hierarchy.Child: node_ids}
-            parent_cx_edges_d = concatenate_cross_edge_dicts(children_cx_edges, unique=True)
+            parent_cx_edges_d = concatenate_cross_edge_dicts(
+                children_cx_edges, unique=True
+            )
             for layer in range(parent_layer, cg.meta.layer_count):
                 if not layer in parent_cx_edges_d:
                     continue

From f7a60314a09602c67d639dff931ae4aa583313bf Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 28 Aug 2023 21:03:46 +0000
Subject: [PATCH 041/105] fix: use the correct layer variable

---
 pychunkedgraph/ingest/create/abstract_layers.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index df6375c5f..d65e225a3 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -190,8 +190,9 @@ def _write(
         for i_cc, node_ids in enumerate(cc_connections[parent_layer]):
             parent_id = reserved_parent_ids[i_cc]
 
-            if parent_layer == 3:
-                # children are from atomic chunks
+            if layer_id == 3:
+                # when layer 3 is being processed, children chunks are at layer 2
+                # layer 2 chunks at this time will only have atomic cross edges
                 cx_edges_d = cg.get_atomic_cross_edges(node_ids)
             else:
                 # children are from abstract chunks

From 815c22b456929d790b5a697a67c0bae5b0c872ef Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 29 Aug 2023 14:58:10 +0000
Subject: [PATCH 042/105] fix: redis pipeline for lower latency

---
 pychunkedgraph/ingest/cli.py    | 29 ++++++++++++++++++++++++++---
 pychunkedgraph/ingest/rq_cli.py |  4 ++--
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 2ad51ca18..997bf768a 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -121,9 +121,32 @@ def ingest_status():
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
     layers = range(2, imanager.cg_meta.layer_count + 1)
-    for layer, layer_count in zip(layers, imanager.cg_meta.layer_chunk_counts):
-        done = redis.scard(f"{layer}c")
-        print(f"{layer}\t: {done} / {layer_count}")
+    layer_counts = imanager.cg_meta.layer_chunk_counts
+
+    pipeline = redis.pipeline()
+    for layer in layers:
+        pipeline.scard(f"{layer}c")
+        queue = Queue(f"l{layer}")
+        pipeline.llen(queue.key)
+        pipeline.zcard(queue.failed_job_registry.key)
+
+    results = pipeline.execute()
+    completed = []
+    queued = []
+    failed = []
+    for i in range(0, len(results), 3):
+        result = results[i : i + 3]
+        completed.append(result[0])
+        queued.append(result[1])
+        failed.append(result[2])
+
+    print("layer status:")
+    for layer, done, count in zip(layers, completed, layer_counts):
+        print(f"{layer}\t: {done} / {count}")
+
+    print("\n\nqueue status:")
+    for layer, q, f in zip(layers, queued, failed):
+        print(f"l{layer}\t: queued {q}, failed {f}")
 
 
 @ingest_cli.command("chunk")
diff --git a/pychunkedgraph/ingest/rq_cli.py b/pychunkedgraph/ingest/rq_cli.py
index 27b9c865d..c9b21ae36 100644
--- a/pychunkedgraph/ingest/rq_cli.py
+++ b/pychunkedgraph/ingest/rq_cli.py
@@ -1,7 +1,8 @@
+# pylint: disable=invalid-name, missing-function-docstring
+
 """
 cli for redis jobs
 """
-import os
 import sys
 
 import click
@@ -14,7 +15,6 @@
 from rq.exceptions import NoSuchJobError
 from rq.registry import StartedJobRegistry
 from rq.registry import FailedJobRegistry
-from flask import current_app
 from flask.cli import AppGroup
 
 from ..utils.redis import REDIS_HOST

From b3ea907399330332957c121c152664af7ff17468 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 29 Aug 2023 15:15:24 +0000
Subject: [PATCH 043/105] fix: pass redis connection

---
 pychunkedgraph/ingest/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 997bf768a..93bb328c1 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -126,7 +126,7 @@ def ingest_status():
     pipeline = redis.pipeline()
     for layer in layers:
         pipeline.scard(f"{layer}c")
-        queue = Queue(f"l{layer}")
+        queue = Queue(f"l{layer}", connection=redis)
         pipeline.llen(queue.key)
         pipeline.zcard(queue.failed_job_registry.key)
 
@@ -146,7 +146,7 @@ def ingest_status():
 
     print("\n\nqueue status:")
     for layer, q, f in zip(layers, queued, failed):
-        print(f"l{layer}\t: queued {q}, failed {f}")
+        print(f"l{layer}\t: queued\t {q}, failed\t {f}")
 
 
 @ingest_cli.command("chunk")

From c9281c82e6e5bbe61d58fd74266e46a73360516b Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 29 Aug 2023 17:51:17 +0000
Subject: [PATCH 044/105] fix: version update for deployment

---
 pychunkedgraph/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 819c3f307..528787cfc 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "2.18.3"
+__version__ = "3.0.0"

From 43971e23c241d5529e66cf39c4b7157dfbb7e0cd Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 29 Aug 2023 17:52:17 +0000
Subject: [PATCH 045/105] fix: status print padding

---
 pychunkedgraph/ingest/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 93bb328c1..0fe925d78 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -146,7 +146,7 @@ def ingest_status():
 
     print("\n\nqueue status:")
     for layer, q, f in zip(layers, queued, failed):
-        print(f"l{layer}\t: queued\t {q}, failed\t {f}")
+        print(f"l{layer}\t: queued\t {q}\t, failed\t {f}")
 
 
 @ingest_cli.command("chunk")

From 94cb711a3eeb94b8bc3a096287df01a785721e78 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 30 Aug 2023 02:37:55 +0000
Subject: [PATCH 046/105] fix: filter active edges for split, add timestamp for
 reading cross chunk edges

---
 pychunkedgraph/graph/cache.py        | 14 ++++--
 pychunkedgraph/graph/chunkedgraph.py | 14 +++++-
 pychunkedgraph/graph/edits.py        | 67 ++++++++++++++++++++++------
 3 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py
index 52fdfd022..d381baa7d 100644
--- a/pychunkedgraph/graph/cache.py
+++ b/pychunkedgraph/graph/cache.py
@@ -68,11 +68,11 @@ def children_decorated(node_id):
 
         return children_decorated(node_id)
 
-    def cross_chunk_edges(self, node_id):
+    def cross_chunk_edges(self, node_id, *, time_stamp: datetime = None):
         @cached(cache=self.cross_chunk_edges_cache, key=lambda node_id: node_id)
         def cross_edges_decorated(node_id):
             edges = self._cg.get_cross_chunk_edges(
-                np.array([node_id], dtype=NODE_ID), raw_only=True
+                np.array([node_id], dtype=NODE_ID), raw_only=True, time_stamp=time_stamp
             )
             return edges[node_id]
 
@@ -107,7 +107,9 @@ def children_multiple(self, node_ids: np.ndarray, *, flatten=False):
             return np.concatenate([*result.values()])
         return result
 
-    def cross_chunk_edges_multiple(self, node_ids: np.ndarray):
+    def cross_chunk_edges_multiple(
+        self, node_ids: np.ndarray, *, time_stamp: datetime = None
+    ):
         result = {}
         node_ids = np.array(node_ids, dtype=NODE_ID)
         if not node_ids.size:
@@ -119,7 +121,11 @@ def cross_chunk_edges_multiple(self, node_ids: np.ndarray):
         result.update(
             {id_: edges_ for id_, edges_ in zip(node_ids[mask], cached_edges_)}
         )
-        result.update(self._cg.get_cross_chunk_edges(node_ids[~mask], raw_only=True))
+        result.update(
+            self._cg.get_cross_chunk_edges(
+                node_ids[~mask], raw_only=True, time_stamp=time_stamp
+            )
+        )
         update(
             self.cross_chunk_edges_cache,
             node_ids[~mask],
diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index a118d4c82..049c7f683 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -318,12 +318,17 @@ def get_atomic_cross_edges(self, l2_ids: typing.Iterable) -> typing.Dict:
         return result
 
     def get_cross_chunk_edges(
-        self, node_ids: typing.Iterable, *, raw_only=False
+        self,
+        node_ids: typing.Iterable,
+        *,
+        raw_only=False,
+        time_stamp: typing.Optional[datetime.datetime] = None,
     ) -> typing.Dict:
         """
         Returns cross edges for `node_ids`.
         A dict of the form `{node_id: {layer: cross_edges}}`.
         """
+        time_stamp = misc_utils.get_valid_timestamp(time_stamp)
         if raw_only or not self.cache:
             result = {}
             node_ids = np.array(node_ids, dtype=basetypes.NODE_ID)
@@ -333,7 +338,12 @@ def get_cross_chunk_edges(
                 attributes.Connectivity.CrossChunkEdge[l]
                 for l in range(2, self.meta.layer_count)
             ]
-            node_edges_d_d = self.client.read_nodes(node_ids=node_ids, properties=attrs)
+            node_edges_d_d = self.client.read_nodes(
+                node_ids=node_ids,
+                properties=attrs,
+                end_time=time_stamp,
+                end_time_inclusive=True,
+            )
             for id_ in node_ids:
                 try:
                     result[id_] = {
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 7a2a03408..c7485a26e 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -179,14 +179,16 @@ def check_fake_edges(
 
 
 def _update_neighbor_cross_edges(
-    cg, new_id: int, cx_edges_d: dict, new_old_id_d: dict, time_stamp
+    cg, new_id: int, cx_edges_d: dict, new_old_id_d: dict, *, time_stamp, parent_ts
 ) -> list:
     updated_entries = []
     node_layer = cg.get_chunk_layer(new_id)
     for cx_layer in range(node_layer, cg.meta.layer_count):
         layer_edges = cx_edges_d.get(cx_layer, types.empty_2d)
         counterparts = layer_edges[:, 1]
-        counterpart_cx_edges_d = cg.get_cross_chunk_edges(counterparts)
+        counterpart_cx_edges_d = cg.get_cross_chunk_edges(
+            counterparts, time_stamp=parent_ts
+        )
         temp_map = {
             old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
         }
@@ -233,7 +235,7 @@ def add_edges(
     )
     atomic_children_d = cg.get_children(l2ids)
     cross_edges_d = merge_cross_edge_dicts(
-        cg.get_cross_chunk_edges(l2ids), l2_cross_edges_d
+        cg.get_cross_chunk_edges(l2ids, time_stamp=parent_ts), l2_cross_edges_d
     )
 
     graph, _, _, graph_ids = flatgraph.build_gt_graph(edges, make_directed=True)
@@ -268,7 +270,12 @@ def add_edges(
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
         entries = _update_neighbor_cross_edges(
-            cg, new_id, new_cx_edges_d, new_old_id_d, time_stamp
+            cg,
+            new_id,
+            new_cx_edges_d,
+            new_old_id_d,
+            time_stamp=time_stamp,
+            parent_ts=parent_ts,
         )
         updated_entries.extend(entries)
 
@@ -288,7 +295,12 @@ def add_edges(
     return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
 
 
-def _process_l2_agglomeration(agg: types.Agglomeration, removed_edges: np.ndarray):
+def _process_l2_agglomeration(
+    cg,
+    agg: types.Agglomeration,
+    removed_edges: np.ndarray,
+    parent_ts: datetime.datetime = None,
+):
     """
     For a given L2 id, remove given edges; calculate new connected components.
     """
@@ -298,6 +310,15 @@ def _process_l2_agglomeration(agg: types.Agglomeration, removed_edges: np.ndarra
     # cross during edits refers to all edges crossing chunk boundary
     cross_edges = [agg.out_edges.get_pairs(), agg.cross_edges.get_pairs()]
     cross_edges = np.concatenate(cross_edges)
+
+    parents = cg.get_parents(cross_edges[:, 0], time_stamp=parent_ts)
+    assert np.unique(parents).size == 1, "got cross edges from more than one l2 node"
+    root = cg.get_root(parents[0], time_stamp=parent_ts)
+
+    # inactive edges must be filtered out
+    neighbor_roots = cg.get_roots(cross_edges[:, 1], time_stamp=parent_ts)
+    active_mask = neighbor_roots == root
+    cross_edges = cross_edges[active_mask]
     cross_edges = cross_edges[~in2d(cross_edges, removed_edges)]
 
     isolated_ids = agg.supervoxels[~np.in1d(agg.supervoxels, chunk_edges)]
@@ -350,7 +371,9 @@ def remove_edges(
     new_l2_ids = []
     for id_ in l2ids:
         agg = l2id_agglomeration_d[id_]
-        ccs, graph_ids, cross_edges = _process_l2_agglomeration(agg, removed_edges)
+        ccs, graph_ids, cross_edges = _process_l2_agglomeration(
+            cg, agg, removed_edges, parent_ts
+        )
         new_parents = cg.id_client.create_node_ids(chunk_id_map[agg.node_id], len(ccs))
 
         cross_edge_layers = cg.get_cross_chunk_edges_layer(cross_edges)
@@ -366,7 +389,7 @@ def remove_edges(
             )
 
     updated_entries = []
-    cx_edges_d = cg.get_cross_chunk_edges(new_l2_ids)
+    cx_edges_d = cg.get_cross_chunk_edges(new_l2_ids, time_stamp=parent_ts)
     for new_id in new_l2_ids:
         new_cx_edges_d = cx_edges_d.get(new_id, {})
         for layer, edges in new_cx_edges_d.items():
@@ -380,7 +403,12 @@ def remove_edges(
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
         entries = _update_neighbor_cross_edges(
-            cg, new_id, new_cx_edges_d, new_old_id_d, time_stamp
+            cg,
+            new_id,
+            new_cx_edges_d,
+            new_old_id_d,
+            time_stamp=time_stamp,
+            parent_ts=parent_ts,
         )
         updated_entries.extend(entries)
 
@@ -452,7 +480,9 @@ def _get_connected_components(self, node_ids: np.ndarray, layer: int):
             self.cg.graph_id,
             self._operation_id,
         ):
-            cross_edges_d = self.cg.get_cross_chunk_edges(node_ids)
+            cross_edges_d = self.cg.get_cross_chunk_edges(
+                node_ids, time_stamp=self._last_successful_ts
+            )
 
         cx_edges = [types.empty_2d]
         for id_ in node_ids:
@@ -485,7 +515,9 @@ def _update_cross_edge_cache(self, parent, children):
         updates cross chunk edges in cache;
         this can only be done after all new components at a layer have IDs
         """
-        cx_edges_d = self.cg.get_cross_chunk_edges(children)
+        cx_edges_d = self.cg.get_cross_chunk_edges(
+            children, time_stamp=self._last_successful_ts
+        )
         cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values(), unique=True)
 
         parent_layer = self.cg.get_chunk_layer(parent)
@@ -505,7 +537,12 @@ def _update_cross_edge_cache(self, parent, children):
             assert np.all(edges[:, 0] == parent)
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
         entries = _update_neighbor_cross_edges(
-            self.cg, parent, new_cx_edges_d, self._new_old_id_d, self._time_stamp
+            self.cg,
+            parent,
+            new_cx_edges_d,
+            self._new_old_id_d,
+            time_stamp=self._time_stamp,
+            parent_ts=self._last_successful_ts,
         )
         self.new_entries.extend(entries)
 
@@ -530,7 +567,9 @@ def _create_new_parents(self, layer: int):
                 # skip connection
                 parent_layer = self.cg.meta.layer_count
                 for l in range(layer + 1, self.cg.meta.layer_count):
-                    cx_edges_d = self.cg.get_cross_chunk_edges([cc_ids[0]])
+                    cx_edges_d = self.cg.get_cross_chunk_edges(
+                        [cc_ids[0]], time_stamp=self._last_successful_ts
+                    )
                     if len(cx_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0:
                         parent_layer = l
                         break
@@ -606,7 +645,9 @@ def _get_cross_edges_val_dicts(self):
         val_dicts = {}
         for layer in range(2, self.cg.meta.layer_count):
             new_ids = np.array(self._new_ids_d[layer], dtype=basetypes.NODE_ID)
-            cross_edges_d = self.cg.get_cross_chunk_edges(new_ids)
+            cross_edges_d = self.cg.get_cross_chunk_edges(
+                new_ids, time_stamp=self._last_successful_ts
+            )
             for id_ in new_ids:
                 val_dict = {}
                 for layer, edges in cross_edges_d[id_].items():

From 51b592c8c96fcc2fcd600ae6b5dda9ef5a7076f0 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 30 Aug 2023 14:06:29 +0000
Subject: [PATCH 047/105] fix: get roots no cache flag

---
 pychunkedgraph/graph/chunkedgraph.py | 11 +++++++++--
 pychunkedgraph/graph/edits.py        |  5 ++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 049c7f683..a3c9aafc3 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -364,6 +364,7 @@ def get_roots(
         stop_layer: int = None,
         ceil: bool = True,
         fail_to_zero: bool = False,
+        raw_only=False,
         n_tries: int = 1,
     ) -> typing.Union[np.ndarray, typing.Dict[int, np.ndarray]]:
         """
@@ -387,7 +388,10 @@ def get_roots(
                 filtered_ids = parent_ids[layer_mask]
                 unique_ids, inverse = np.unique(filtered_ids, return_inverse=True)
                 temp_ids = self.get_parents(
-                    unique_ids, time_stamp=time_stamp, fail_to_zero=fail_to_zero
+                    unique_ids,
+                    time_stamp=time_stamp,
+                    fail_to_zero=fail_to_zero,
+                    raw_only=raw_only,
                 )
                 if not temp_ids.size:
                     break
@@ -442,6 +446,7 @@ def get_root(
         get_all_parents: bool = False,
         stop_layer: int = None,
         ceil: bool = True,
+        raw_only: bool = False,
         n_tries: int = 1,
     ) -> typing.Union[typing.List[np.uint64], np.uint64]:
         """Takes a node id and returns the associated agglomeration ids."""
@@ -459,7 +464,9 @@ def get_root(
         for _ in range(n_tries):
             parent_id = node_id
             for _ in range(self.get_chunk_layer(node_id), int(stop_layer + 1)):
-                temp_parent_id = self.get_parent(parent_id, time_stamp=time_stamp)
+                temp_parent_id = self.get_parent(
+                    parent_id, time_stamp=time_stamp, raw_only=raw_only
+                )
                 if temp_parent_id is None:
                     break
                 else:
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index c7485a26e..709c2dadc 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -316,7 +316,10 @@ def _process_l2_agglomeration(
     root = cg.get_root(parents[0], time_stamp=parent_ts)
 
     # inactive edges must be filtered out
-    neighbor_roots = cg.get_roots(cross_edges[:, 1], time_stamp=parent_ts)
+    # we must avoid the cache to read roots to get segment state before edit began
+    neighbor_roots = cg.get_roots(
+        cross_edges[:, 1], raw_only=True, time_stamp=parent_ts
+    )
     active_mask = neighbor_roots == root
     cross_edges = cross_edges[active_mask]
     cross_edges = cross_edges[~in2d(cross_edges, removed_edges)]

From 70abf729aef5d0a3cc20fc00f44a33d37457d050 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Thu, 31 Aug 2023 12:51:51 +0000
Subject: [PATCH 048/105] fix: parent and roots no cache

---
 pychunkedgraph/graph/edits.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 709c2dadc..dd53354d8 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -311,9 +311,9 @@ def _process_l2_agglomeration(
     cross_edges = [agg.out_edges.get_pairs(), agg.cross_edges.get_pairs()]
     cross_edges = np.concatenate(cross_edges)
 
-    parents = cg.get_parents(cross_edges[:, 0], time_stamp=parent_ts)
+    parents = cg.get_parents(cross_edges[:, 0], time_stamp=parent_ts, raw_only=True)
     assert np.unique(parents).size == 1, "got cross edges from more than one l2 node"
-    root = cg.get_root(parents[0], time_stamp=parent_ts)
+    root = cg.get_root(parents[0], time_stamp=parent_ts, raw_only=True)
 
     # inactive edges must be filtered out
     # we must avoid the cache to read roots to get segment state before edit began

From a2c027c1a82d8289e50b78a36569609db91fea81 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Thu, 31 Aug 2023 15:52:12 +0000
Subject: [PATCH 049/105] fix: out edges here dont refer to edges crossing
 chunk

---
 pychunkedgraph/graph/edits.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index dd53354d8..fd397d5a8 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -307,10 +307,7 @@ def _process_l2_agglomeration(
     chunk_edges = agg.in_edges.get_pairs()
     chunk_edges = chunk_edges[~in2d(chunk_edges, removed_edges)]
 
-    # cross during edits refers to all edges crossing chunk boundary
-    cross_edges = [agg.out_edges.get_pairs(), agg.cross_edges.get_pairs()]
-    cross_edges = np.concatenate(cross_edges)
-
+    cross_edges = agg.cross_edges.get_pairs()
     parents = cg.get_parents(cross_edges[:, 0], time_stamp=parent_ts, raw_only=True)
     assert np.unique(parents).size == 1, "got cross edges from more than one l2 node"
     root = cg.get_root(parents[0], time_stamp=parent_ts, raw_only=True)

From 2cab759867d00cf645583a65209a708f86ab4855 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 2 Sep 2023 19:42:49 +0000
Subject: [PATCH 050/105] fix: missing timestamps

---
 pychunkedgraph/graph/edits.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index fd397d5a8..76c708a38 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -394,7 +394,7 @@ def remove_edges(
         new_cx_edges_d = cx_edges_d.get(new_id, {})
         for layer, edges in new_cx_edges_d.items():
             svs = np.unique(edges)
-            parents = cg.get_parents(svs)
+            parents = cg.get_parents(svs, time_stamp=parent_ts)
             temp_map = dict(zip(svs, parents))
 
             edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
@@ -523,7 +523,10 @@ def _update_cross_edge_cache(self, parent, children):
         parent_layer = self.cg.get_chunk_layer(parent)
         edge_nodes = np.unique(np.concatenate([*cx_edges_d.values(), types.empty_2d]))
         edge_parents = self.cg.get_roots(
-            edge_nodes, stop_layer=parent_layer, ceil=False
+            edge_nodes,
+            stop_layer=parent_layer,
+            ceil=False,
+            time_stamp=self._last_successful_ts,
         )
         edge_parents_d = dict(zip(edge_nodes, edge_parents))
 

From 206e2823209b5a9c76b83cd026777cdf2ca85e08 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Fri, 8 Sep 2023 15:53:34 +0000
Subject: [PATCH 051/105] fix: consolidate neighbor nodes cx edge updates

---
 pychunkedgraph/graph/edits.py | 124 +++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 56 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 76c708a38..f08a5310d 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -179,38 +179,53 @@ def check_fake_edges(
 
 
 def _update_neighbor_cross_edges(
-    cg, new_id: int, cx_edges_d: dict, new_old_id_d: dict, *, time_stamp, parent_ts
-) -> list:
-    updated_entries = []
-    node_layer = cg.get_chunk_layer(new_id)
-    for cx_layer in range(node_layer, cg.meta.layer_count):
-        layer_edges = cx_edges_d.get(cx_layer, types.empty_2d)
-        counterparts = layer_edges[:, 1]
-        counterpart_cx_edges_d = cg.get_cross_chunk_edges(
-            counterparts, time_stamp=parent_ts
-        )
-        temp_map = {
+    cg, new_ids: List[int], new_old_id_d: dict, *, time_stamp, parent_ts
+) -> List:
+    temp_map = {}
+    for new_id in new_ids:
+        old_new_d = {
             old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
         }
-        for counterpart, edges_d in counterpart_cx_edges_d.items():
-            val_dict = {}
-            for layer in range(2, cg.meta.layer_count):
-                edges = edges_d.get(layer, types.empty_2d)
-                if edges.size == 0:
-                    continue
-                assert np.all(edges[:, 0] == counterpart)
-                edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
-                edges_d[layer] = edges
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
-            if not val_dict:
+        temp_map.update(old_new_d)
+    newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids)
+
+    def _get_counterparts(layer) -> set:
+        result = set()
+        for new_id in new_ids:
+            cx_edges_d = newid_cx_edges_d[new_id]
+            layer_edges = cx_edges_d.get(layer, types.empty_2d)
+            result.update(layer_edges[:, 1])
+        return result
+
+    start_layer = min(cg.get_chunk_layers(new_ids))
+    counterparts = set()
+    for cx_layer in range(start_layer, cg.meta.layer_count):
+        counterparts.update(_get_counterparts(cx_layer))
+
+    counterpart_cx_edges_d = cg.get_cross_chunk_edges(
+        counterparts, time_stamp=parent_ts
+    )
+
+    updated_entries = []
+    for counterpart, edges_d in counterpart_cx_edges_d.items():
+        val_dict = {}
+        for layer in range(2, cg.meta.layer_count):
+            edges = edges_d.get(layer, types.empty_2d)
+            if edges.size == 0:
                 continue
-            cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
-            row = cg.client.mutate_row(
-                serialize_uint64(counterpart),
-                val_dict,
-                time_stamp=time_stamp,
-            )
-            updated_entries.append(row)
+            assert np.all(edges[:, 0] == counterpart)
+            edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+            edges_d[layer] = edges
+            val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+        if not val_dict:
+            continue
+        cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
+        row = cg.client.mutate_row(
+            serialize_uint64(counterpart),
+            val_dict,
+            time_stamp=time_stamp,
+        )
+        updated_entries.append(row)
     return updated_entries
 
 
@@ -269,15 +284,14 @@ def add_edges(
             new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
-        entries = _update_neighbor_cross_edges(
-            cg,
-            new_id,
-            new_cx_edges_d,
-            new_old_id_d,
-            time_stamp=time_stamp,
-            parent_ts=parent_ts,
-        )
-        updated_entries.extend(entries)
+    entries = _update_neighbor_cross_edges(
+        cg,
+        new_l2_ids,
+        new_old_id_d,
+        time_stamp=time_stamp,
+        parent_ts=parent_ts,
+    )
+    updated_entries.extend(entries)
 
     create_parents = CreateParentNodes(
         cg,
@@ -402,15 +416,14 @@ def remove_edges(
             new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
-        entries = _update_neighbor_cross_edges(
-            cg,
-            new_id,
-            new_cx_edges_d,
-            new_old_id_d,
-            time_stamp=time_stamp,
-            parent_ts=parent_ts,
-        )
-        updated_entries.extend(entries)
+    entries = _update_neighbor_cross_edges(
+        cg,
+        new_l2_ids,
+        new_old_id_d,
+        time_stamp=time_stamp,
+        parent_ts=parent_ts,
+    )
+    updated_entries.extend(entries)
 
     create_parents = CreateParentNodes(
         cg,
@@ -539,15 +552,6 @@ def _update_cross_edge_cache(self, parent, children):
             new_cx_edges_d[layer] = np.unique(edges, axis=0)
             assert np.all(edges[:, 0] == parent)
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
-        entries = _update_neighbor_cross_edges(
-            self.cg,
-            parent,
-            new_cx_edges_d,
-            self._new_old_id_d,
-            time_stamp=self._time_stamp,
-            parent_ts=self._last_successful_ts,
-        )
-        self.new_entries.extend(entries)
 
     def _create_new_parents(self, layer: int):
         """
@@ -594,6 +598,14 @@ def _create_new_parents(self, layer: int):
         for new_id in new_parent_ids:
             children = self.cg.get_children(new_id)
             self._update_cross_edge_cache(new_id, children)
+        entries = _update_neighbor_cross_edges(
+            self.cg,
+            new_parent_ids,
+            self._new_old_id_d,
+            time_stamp=self._time_stamp,
+            parent_ts=self._last_successful_ts,
+        )
+        self.new_entries.extend(entries)
 
     def run(self) -> Iterable:
         """

From 737e3ef37daa6791824133b1e659e0f3d8f296ff Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Fri, 8 Sep 2023 15:57:59 +0000
Subject: [PATCH 052/105] fix: set to list for np.array

---
 pychunkedgraph/graph/edits.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index f08a5310d..3797e2082 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -202,12 +202,9 @@ def _get_counterparts(layer) -> set:
     for cx_layer in range(start_layer, cg.meta.layer_count):
         counterparts.update(_get_counterparts(cx_layer))
 
-    counterpart_cx_edges_d = cg.get_cross_chunk_edges(
-        counterparts, time_stamp=parent_ts
-    )
-
+    cx_edges_d = cg.get_cross_chunk_edges(list(counterparts), time_stamp=parent_ts)
     updated_entries = []
-    for counterpart, edges_d in counterpart_cx_edges_d.items():
+    for counterpart, edges_d in cx_edges_d.items():
         val_dict = {}
         for layer in range(2, cg.meta.layer_count):
             edges = edges_d.get(layer, types.empty_2d)

From 39d16badc5b3c145478141c9c9bfdc7709ebad8c Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Fri, 8 Sep 2023 16:52:42 +0000
Subject: [PATCH 053/105] fix: use copy=False where possible; some cleanup

---
 pychunkedgraph/graph/cache.py               |  6 +-
 pychunkedgraph/graph/chunkedgraph.py        | 81 +--------------------
 pychunkedgraph/graph/chunks/utils.py        | 26 ++++---
 pychunkedgraph/graph/connectivity/search.py | 47 ------------
 pychunkedgraph/graph/edits.py               |  6 +-
 pychunkedgraph/graph/utils/flatgraph.py     | 15 +++-
 6 files changed, 35 insertions(+), 146 deletions(-)
 delete mode 100644 pychunkedgraph/graph/connectivity/search.py

diff --git a/pychunkedgraph/graph/cache.py b/pychunkedgraph/graph/cache.py
index d381baa7d..13fa962ae 100644
--- a/pychunkedgraph/graph/cache.py
+++ b/pychunkedgraph/graph/cache.py
@@ -79,7 +79,7 @@ def cross_edges_decorated(node_id):
         return cross_edges_decorated(node_id)
 
     def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None):
-        node_ids = np.array(node_ids, dtype=NODE_ID)
+        node_ids = np.array(node_ids, dtype=NODE_ID, copy=False)
         if not node_ids.size:
             return node_ids
         mask = np.in1d(node_ids, np.fromiter(self.parents_cache.keys(), dtype=NODE_ID))
@@ -93,7 +93,7 @@ def parents_multiple(self, node_ids: np.ndarray, *, time_stamp: datetime = None)
 
     def children_multiple(self, node_ids: np.ndarray, *, flatten=False):
         result = {}
-        node_ids = np.array(node_ids, dtype=NODE_ID)
+        node_ids = np.array(node_ids, dtype=NODE_ID, copy=False)
         if not node_ids.size:
             return result
         mask = np.in1d(node_ids, np.fromiter(self.children_cache.keys(), dtype=NODE_ID))
@@ -111,7 +111,7 @@ def cross_chunk_edges_multiple(
         self, node_ids: np.ndarray, *, time_stamp: datetime = None
     ):
         result = {}
-        node_ids = np.array(node_ids, dtype=NODE_ID)
+        node_ids = np.array(node_ids, dtype=NODE_ID, copy=False)
         if not node_ids.size:
             return result
         mask = np.in1d(
diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index a3c9aafc3..472257d1e 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -629,7 +629,7 @@ def get_fake_edges(
         )
         for id_, val in fake_edges_d.items():
             edges = np.concatenate(
-                [np.array(e.value, dtype=basetypes.NODE_ID) for e in val]
+                [np.array(e.value, dtype=basetypes.NODE_ID, copy=False) for e in val]
             )
             result[id_] = Edges(edges[:, 0], edges[:, 1], fake_edges=True)
         return result
@@ -827,82 +827,7 @@ def redo_operation(
             multicut_as_split=True,
         ).execute()
 
-    # PRIVATE
-
-    def _get_bounding_chunk_ids(
-        self,
-        parent_chunk_ids: typing.Iterable,
-        unique: bool = False,
-    ) -> typing.Dict:
-        """
-        Returns bounding chunk IDs at layers < parent_layer for all chunk IDs.
-        Dict[parent_chunk_id] = np.array(bounding_chunk_ids)
-        """
-        parent_chunk_coords = self.get_chunk_coordinates_multiple(parent_chunk_ids)
-        parents_layer = self.get_chunk_layer(parent_chunk_ids[0])
-        chunk_id_bchunk_ids_d = {}
-        for i, chunk_id in enumerate(parent_chunk_ids):
-            if chunk_id in chunk_id_bchunk_ids_d:
-                # `parent_chunk_ids` can have duplicates
-                # avoid redundant calculations
-                continue
-            parent_coord = parent_chunk_coords[i]
-            chunk_ids = [types.empty_1d]
-            for child_layer in range(2, parents_layer):
-                bcoords = chunk_utils.get_bounding_children_chunks(
-                    self.meta,
-                    parents_layer,
-                    parent_coord,
-                    child_layer,
-                    return_unique=False,
-                )
-                bchunks_ids = chunk_utils.get_chunk_ids_from_coords(
-                    self.meta, child_layer, bcoords
-                )
-                chunk_ids.append(bchunks_ids)
-            chunk_ids = np.concatenate(chunk_ids)
-            if unique:
-                chunk_ids = np.unique(chunk_ids)
-            chunk_id_bchunk_ids_d[chunk_id] = chunk_ids
-        return chunk_id_bchunk_ids_d
-
-    def _get_bounding_l2_children(self, parents: typing.Iterable) -> typing.Dict:
-        parent_chunk_ids = self.get_chunk_ids_from_node_ids(parents)
-        chunk_id_bchunk_ids_d = self._get_bounding_chunk_ids(
-            parent_chunk_ids, unique=len(parents) >= 200
-        )
-
-        parent_descendants_d = {
-            _id: np.array([_id], dtype=basetypes.NODE_ID) for _id in parents
-        }
-        descendants_all = np.concatenate(list(parent_descendants_d.values()))
-        descendants_layers = self.get_chunk_layers(descendants_all)
-        layer_mask = descendants_layers > 2
-        descendants_all = descendants_all[layer_mask]
-
-        while descendants_all.size:
-            descendant_children_d = self.get_children(descendants_all)
-            for i, parent_id in enumerate(parents):
-                _descendants = parent_descendants_d[parent_id]
-                _layers = self.get_chunk_layers(_descendants)
-                _l2mask = _layers == 2
-                descendants = [_descendants[_l2mask]]
-                for child in _descendants[~_l2mask]:
-                    descendants.append(descendant_children_d[child])
-                descendants = np.concatenate(descendants)
-                chunk_ids = self.get_chunk_ids_from_node_ids(descendants)
-                bchunk_ids = chunk_id_bchunk_ids_d[parent_chunk_ids[i]]
-                bounding_descendants = descendants[np.in1d(chunk_ids, bchunk_ids)]
-                parent_descendants_d[parent_id] = bounding_descendants
-
-            descendants_all = np.concatenate(list(parent_descendants_d.values()))
-            descendants_layers = self.get_chunk_layers(descendants_all)
-            layer_mask = descendants_layers > 2
-            descendants_all = descendants_all[layer_mask]
-        return parent_descendants_d
-
     # HELPERS / WRAPPERS
-
     def is_root(self, node_id: basetypes.NODE_ID) -> bool:
         return self.get_chunk_layer(node_id) == self.meta.layer_count
 
@@ -940,7 +865,9 @@ def get_chunk_coordinates(self, node_or_chunk_id: basetypes.NODE_ID):
         return chunk_utils.get_chunk_coordinates(self.meta, node_or_chunk_id)
 
     def get_chunk_coordinates_multiple(self, node_or_chunk_ids: typing.Sequence):
-        node_or_chunk_ids = np.array(node_or_chunk_ids, dtype=basetypes.NODE_ID)
+        node_or_chunk_ids = np.array(
+            node_or_chunk_ids, dtype=basetypes.NODE_ID, copy=False
+        )
         layers = self.get_chunk_layers(node_or_chunk_ids)
         assert np.all(layers == layers[0]), "All IDs must have the same layer."
         return chunk_utils.get_chunk_coordinates_multiple(self.meta, node_or_chunk_ids)
diff --git a/pychunkedgraph/graph/chunks/utils.py b/pychunkedgraph/graph/chunks/utils.py
index dc895bde4..4d01258bd 100644
--- a/pychunkedgraph/graph/chunks/utils.py
+++ b/pychunkedgraph/graph/chunks/utils.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 
+
 def get_chunks_boundary(voxel_boundary, chunk_size) -> np.ndarray:
     """returns number of chunks in each dimension"""
     return np.ceil((voxel_boundary / chunk_size)).astype(int)
@@ -43,7 +44,7 @@ def normalize_bounding_box(
 
 
 def get_chunk_layer(meta, node_or_chunk_id: np.uint64) -> int:
-    """ Extract Layer from Node ID or Chunk ID """
+    """Extract Layer from Node ID or Chunk ID"""
     return int(int(node_or_chunk_id) >> 64 - meta.graph_config.LAYER_ID_BITS)
 
 
@@ -75,9 +76,9 @@ def get_chunk_coordinates(meta, node_or_chunk_id: np.uint64) -> np.ndarray:
     y_offset = x_offset - bits_per_dim
     z_offset = y_offset - bits_per_dim
 
-    x = int(node_or_chunk_id) >> x_offset & 2 ** bits_per_dim - 1
-    y = int(node_or_chunk_id) >> y_offset & 2 ** bits_per_dim - 1
-    z = int(node_or_chunk_id) >> z_offset & 2 ** bits_per_dim - 1
+    x = int(node_or_chunk_id) >> x_offset & 2**bits_per_dim - 1
+    y = int(node_or_chunk_id) >> y_offset & 2**bits_per_dim - 1
+    z = int(node_or_chunk_id) >> z_offset & 2**bits_per_dim - 1
     return np.array([x, y, z])
 
 
@@ -86,7 +87,7 @@ def get_chunk_coordinates_multiple(meta, ids: np.ndarray) -> np.ndarray:
     Array version of get_chunk_coordinates.
     Assumes all given IDs are in same layer.
     """
-    if not len(ids):
+    if len(ids) == 0:
         return np.array([])
     layer = get_chunk_layer(meta, ids[0])
     bits_per_dim = meta.bitmasks[layer]
@@ -95,10 +96,10 @@ def get_chunk_coordinates_multiple(meta, ids: np.ndarray) -> np.ndarray:
     y_offset = x_offset - bits_per_dim
     z_offset = y_offset - bits_per_dim
 
-    ids = np.array(ids, dtype=int)
-    X = ids >> x_offset & 2 ** bits_per_dim - 1
-    Y = ids >> y_offset & 2 ** bits_per_dim - 1
-    Z = ids >> z_offset & 2 ** bits_per_dim - 1
+    ids = np.array(ids, dtype=int, copy=False)
+    X = ids >> x_offset & 2**bits_per_dim - 1
+    Y = ids >> y_offset & 2**bits_per_dim - 1
+    Z = ids >> z_offset & 2**bits_per_dim - 1
     return np.column_stack((X, Y, Z))
 
 
@@ -142,14 +143,15 @@ def get_chunk_ids_from_coords(meta, layer: int, coords: np.ndarray):
 
 
 def get_chunk_ids_from_node_ids(meta, ids: Iterable[np.uint64]) -> np.ndarray:
-    """ Extract Chunk IDs from Node IDs"""
+    """Extract Chunk IDs from Node IDs"""
     if len(ids) == 0:
         return np.array([], dtype=np.uint64)
 
     bits_per_dims = np.array([meta.bitmasks[l] for l in get_chunk_layers(meta, ids)])
     offsets = 64 - meta.graph_config.LAYER_ID_BITS - 3 * bits_per_dims
 
-    cids1 = np.array((np.array(ids, dtype=int) >> offsets) << offsets, dtype=np.uint64)
+    ids = np.array(ids, dtype=int, copy=False)
+    cids1 = np.array((ids >> offsets) << offsets, dtype=np.uint64)
     # cids2 = np.vectorize(get_chunk_id)(meta, ids)
     # assert np.all(cids1 == cids2)
     return cids1
@@ -164,7 +166,7 @@ def _compute_chunk_id(
 ) -> np.uint64:
     s_bits_per_dim = meta.bitmasks[layer]
     if not (
-        x < 2 ** s_bits_per_dim and y < 2 ** s_bits_per_dim and z < 2 ** s_bits_per_dim
+        x < 2**s_bits_per_dim and y < 2**s_bits_per_dim and z < 2**s_bits_per_dim
     ):
         raise ValueError(
             f"Coordinate is out of range \
diff --git a/pychunkedgraph/graph/connectivity/search.py b/pychunkedgraph/graph/connectivity/search.py
deleted file mode 100644
index bd3faf227..000000000
--- a/pychunkedgraph/graph/connectivity/search.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import random
-from typing import List
-
-import numpy as np
-from graph_tool.search import bfs_search
-from graph_tool.search import BFSVisitor
-from graph_tool.search import StopSearch
-
-from ..utils.basetypes import NODE_ID
-
-
-class TargetVisitor(BFSVisitor):
-    def __init__(self, target, reachable):
-        self.target = target
-        self.reachable = reachable
-
-    def discover_vertex(self, u):
-        if u == self.target:
-            self.reachable[u] = 1
-            raise StopSearch
-
-
-def check_reachability(g, sv1s: np.ndarray, sv2s: np.ndarray, original_ids: np.ndarray) -> np.ndarray:
-    """
-    g: graph tool Graph instance with ids 0 to N-1 where N = vertex count
-    original_ids: sorted ChunkedGraph supervoxel ids
-        (to identify corresponding ids in graph tool)
-    for each pair (sv1, sv2) check if a path exists (BFS)
-    """
-    # mapping from original ids to graph tool ids
-    original_ids_d = {
-        sv_id: index for sv_id, index in zip(original_ids, range(len(original_ids)))
-    }
-    reachable = g.new_vertex_property("int", val=0)
-
-    def _check_reachability(source, target):
-        bfs_search(g, source, TargetVisitor(target, reachable))
-        return reachable[target]
-
-    return np.array(
-        [
-            _check_reachability(original_ids_d[source], original_ids_d[target])
-            for source, target in zip(sv1s, sv2s)
-        ],
-        dtype=bool,
-    )
-
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 3797e2082..6792f2f7d 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -441,7 +441,7 @@ def _get_flipped_ids(id_map, node_ids):
     """
     returns old or new ids according to the map
     """
-    ids = [np.array(list(id_map[id_]), dtype=basetypes.NODE_ID) for id_ in node_ids]
+    ids = [np.array(list(id_map[id_]), dtype=basetypes.NODE_ID, copy=False) for id_ in node_ids]
     return np.concatenate(ids)
 
 
@@ -629,7 +629,7 @@ def _update_root_id_lineage(self):
         assert len(former_roots) < 2 or len(new_roots) < 2, "new roots are inconsistent"
         for new_root_id in new_roots:
             val_dict = {
-                attributes.Hierarchy.FormerParent: np.array(former_roots),
+                attributes.Hierarchy.FormerParent: former_roots,
                 attributes.OperationLogs.OperationID: self._operation_id,
             }
             self.new_entries.append(
@@ -642,7 +642,7 @@ def _update_root_id_lineage(self):
 
         for former_root_id in former_roots:
             val_dict = {
-                attributes.Hierarchy.NewParent: np.array(new_roots),
+                attributes.Hierarchy.NewParent: new_roots,
                 attributes.OperationLogs.OperationID: self._operation_id,
             }
             self.new_entries.append(
diff --git a/pychunkedgraph/graph/utils/flatgraph.py b/pychunkedgraph/graph/utils/flatgraph.py
index df469d728..03cb6e2d2 100644
--- a/pychunkedgraph/graph/utils/flatgraph.py
+++ b/pychunkedgraph/graph/utils/flatgraph.py
@@ -1,8 +1,11 @@
+# pylint: disable=invalid-name, missing-docstring, c-extension-no-member
+
+from itertools import combinations, chain
+
 import fastremap
 import numpy as np
-from itertools import combinations, chain
 from graph_tool import Graph, GraphView
-from graph_tool import topology, search
+from graph_tool import topology
 
 
 def build_gt_graph(
@@ -88,7 +91,10 @@ def team_paths_all_to_all(graph, capacity, team_vertex_ids):
 
 
 def neighboring_edges(graph, vertex_id):
-    """Returns vertex and edge lists of a seed vertex, in the same format as team_paths_all_to_all."""
+    """
+    Returns vertex and edge lists of a seed vertex,
+    in the same format as team_paths_all_to_all.
+    """
     add_v = []
     add_e = []
     v0 = graph.vertex(vertex_id)
@@ -124,7 +130,8 @@ def compute_filtered_paths(
         gfilt, capacity, team_vertex_ids
     )
 
-    # graph-tool will invalidate the vertex and edge properties if I don't rebase them on the main graph
+    # graph-tool will invalidate the vertex and
+    # edge properties if I don't rebase them on the main graph
     # before tearing down the GraphView
     new_paths_e = []
     for pth in paths_e:

From 97eee3e20a5e651dddfab458c8b163d64d17e68c Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Fri, 8 Sep 2023 16:56:09 +0000
Subject: [PATCH 054/105] fix: attribute type must be np.array

---
 pychunkedgraph/graph/edits.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 6792f2f7d..278cb92db 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -441,7 +441,10 @@ def _get_flipped_ids(id_map, node_ids):
     """
     returns old or new ids according to the map
     """
-    ids = [np.array(list(id_map[id_]), dtype=basetypes.NODE_ID, copy=False) for id_ in node_ids]
+    ids = [
+        np.array(list(id_map[id_]), dtype=basetypes.NODE_ID, copy=False)
+        for id_ in node_ids
+    ]
     return np.concatenate(ids)
 
 
@@ -642,7 +645,9 @@ def _update_root_id_lineage(self):
 
         for former_root_id in former_roots:
             val_dict = {
-                attributes.Hierarchy.NewParent: new_roots,
+                attributes.Hierarchy.NewParent: np.array(
+                    new_roots, dtype=basetypes.NODE_ID
+                ),
                 attributes.OperationLogs.OperationID: self._operation_id,
             }
             self.new_entries.append(

From 9138967e8846cb9d961f94903942f60d47d1fb3b Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 9 Sep 2023 19:00:17 +0000
Subject: [PATCH 055/105] fix(ingest): worker details in status

---
 pychunkedgraph/ingest/cli.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 0fe925d78..89106a097 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -11,6 +11,8 @@
 import yaml
 from flask.cli import AppGroup
 from rq import Queue
+from rq import Worker
+from rq.worker import WorkerStatus
 
 from .cluster import create_atomic_chunk
 from .cluster import create_parent_chunk
@@ -124,11 +126,14 @@ def ingest_status():
     layer_counts = imanager.cg_meta.layer_chunk_counts
 
     pipeline = redis.pipeline()
+    worker_busy = []
     for layer in layers:
         pipeline.scard(f"{layer}c")
         queue = Queue(f"l{layer}", connection=redis)
         pipeline.llen(queue.key)
         pipeline.zcard(queue.failed_job_registry.key)
+        workers = Worker.all(queue=queue)
+        worker_busy.append(sum([w.get_state() == WorkerStatus.BUSY for w in workers]))
 
     results = pipeline.execute()
     completed = []
@@ -140,13 +145,16 @@ def ingest_status():
         queued.append(result[1])
         failed.append(result[2])
 
-    print("layer status:")
+    print(f"version: \t{imanager.cg.version}")
+    print(f"graph_id: \t{imanager.cg.graph_id}")
+    print(f"chunk_size: \t{imanager.cg.meta.graph_config.CHUNK_SIZE}")
+    print("\nlayer status:")
     for layer, done, count in zip(layers, completed, layer_counts):
         print(f"{layer}\t: {done} / {count}")
 
     print("\n\nqueue status:")
-    for layer, q, f in zip(layers, queued, failed):
-        print(f"l{layer}\t: queued\t {q}\t, failed\t {f}")
+    for layer, q, f, wb in zip(layers, queued, failed, worker_busy):
+        print(f"l{layer}\t: queued: {q}\t\t failed: {f}\t\t busy: {wb}")
 
 
 @ingest_cli.command("chunk")

From 37b497da5b51bbe1634ee69a1adeb42d9945ba48 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 9 Sep 2023 19:44:07 +0000
Subject: [PATCH 056/105] fix: handle empty input

---
 pychunkedgraph/graph/edits.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 278cb92db..6c7176924 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -441,6 +441,8 @@ def _get_flipped_ids(id_map, node_ids):
     """
     returns old or new ids according to the map
     """
+    if len(node_ids) == 0:
+        return types.empty_1d
     ids = [
         np.array(list(id_map[id_]), dtype=basetypes.NODE_ID, copy=False)
         for id_ in node_ids

From 1fc55e4d6fdc39b003d15cd49314ebe277af0605 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sat, 9 Sep 2023 20:00:52 +0000
Subject: [PATCH 057/105] fix: use empty array instead

---
 pychunkedgraph/graph/edits.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 6c7176924..17502ddda 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -441,12 +441,11 @@ def _get_flipped_ids(id_map, node_ids):
     """
     returns old or new ids according to the map
     """
-    if len(node_ids) == 0:
-        return types.empty_1d
     ids = [
         np.array(list(id_map[id_]), dtype=basetypes.NODE_ID, copy=False)
         for id_ in node_ids
     ]
+    ids.append(types.empty_1d)  # concatenate needs at least one array
     return np.concatenate(ids)
 
 

From 9a4d2b274bb09a7d813e1ba40ab858780770e5fa Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 10 Sep 2023 18:59:55 +0000
Subject: [PATCH 058/105] fix: missed time_stamp

---
 pychunkedgraph/graph/chunkedgraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 472257d1e..988dd5d89 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -353,7 +353,7 @@ def get_cross_chunk_edges(
                 except KeyError:
                     result[id_] = {}
             return result
-        return self.cache.cross_chunk_edges_multiple(node_ids)
+        return self.cache.cross_chunk_edges_multiple(node_ids, time_stamp=time_stamp)
 
     def get_roots(
         self,

From 68581f0a61188c3043dae43bf6093c33ca9cd41a Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 10 Sep 2023 19:15:28 +0000
Subject: [PATCH 059/105] fix: only consolidate cx_edge writes; update per
 new_id

---
 pychunkedgraph/graph/edits.py | 76 +++++++++++++++++------------------
 1 file changed, 37 insertions(+), 39 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 17502ddda..f835577e0 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -178,50 +178,53 @@ def check_fake_edges(
     return atomic_edges, rows
 
 
-def _update_neighbor_cross_edges(
-    cg, new_ids: List[int], new_old_id_d: dict, *, time_stamp, parent_ts
-) -> List:
-    temp_map = {}
-    for new_id in new_ids:
-        old_new_d = {
-            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
-        }
-        temp_map.update(old_new_d)
-    newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids)
-
-    def _get_counterparts(layer) -> set:
-        result = set()
-        for new_id in new_ids:
-            cx_edges_d = newid_cx_edges_d[new_id]
-            layer_edges = cx_edges_d.get(layer, types.empty_2d)
-            result.update(layer_edges[:, 1])
-        return result
-
-    start_layer = min(cg.get_chunk_layers(new_ids))
-    counterparts = set()
-    for cx_layer in range(start_layer, cg.meta.layer_count):
-        counterparts.update(_get_counterparts(cx_layer))
-
-    cx_edges_d = cg.get_cross_chunk_edges(list(counterparts), time_stamp=parent_ts)
-    updated_entries = []
-    for counterpart, edges_d in cx_edges_d.items():
+def _update_neighbor_cross_edges_single(
+    cg, new_id: int, cx_edges_d: dict, node_map: dict, *, parent_ts
+) -> dict:
+    node_layer = cg.get_chunk_layer(new_id)
+    counterparts = []
+    for layer in range(node_layer, cg.meta.layer_count):
+        layer_edges = cx_edges_d.get(layer, types.empty_2d)
+        counterparts.extend(layer_edges[:, 1])
+
+    cp_cx_edges_d = cg.get_cross_chunk_edges(counterparts, time_stamp=parent_ts)
+    updated_counterparts = {}
+    for counterpart, edges_d in cp_cx_edges_d.items():
         val_dict = {}
         for layer in range(2, cg.meta.layer_count):
             edges = edges_d.get(layer, types.empty_2d)
             if edges.size == 0:
                 continue
             assert np.all(edges[:, 0] == counterpart)
-            edges = fastremap.remap(edges, temp_map, preserve_missing_labels=True)
+            edges = fastremap.remap(edges, node_map, preserve_missing_labels=True)
             edges_d[layer] = edges
             val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
         if not val_dict:
             continue
         cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
-        row = cg.client.mutate_row(
-            serialize_uint64(counterpart),
-            val_dict,
-            time_stamp=time_stamp,
+        updated_counterparts[counterpart] = val_dict
+    return updated_counterparts
+
+
+def _update_neighbor_cross_edges(
+    cg, new_ids: List[int], new_old_id_d: dict, *, time_stamp, parent_ts
+) -> List:
+    newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts)
+    updated_counterparts = {}
+    for new_id in new_ids:
+        cx_edges_d = newid_cx_edges_d[new_id]
+        temp_map = {
+            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
+        }
+        result = _update_neighbor_cross_edges_single(
+            cg, new_id, cx_edges_d, temp_map, parent_ts=parent_ts
         )
+        updated_counterparts.update(result)
+
+    updated_entries = []
+    for node, val_dict in updated_counterparts.items():
+        rowkey = serialize_uint64(node)
+        row = cg.client.mutate_row(rowkey, val_dict, time_stamp=time_stamp)
         updated_entries.append(row)
     return updated_entries
 
@@ -269,7 +272,6 @@ def add_edges(
 
     # update cross chunk edges by replacing old_ids with new
     # this can be done only after all new IDs have been created
-    updated_entries = []
     for new_id, cc_indices in zip(new_l2_ids, components):
         l2ids_ = graph_ids[cc_indices]
         new_cx_edges_d = {}
@@ -281,14 +283,13 @@ def add_edges(
             new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
-    entries = _update_neighbor_cross_edges(
+    updated_entries = _update_neighbor_cross_edges(
         cg,
         new_l2_ids,
         new_old_id_d,
         time_stamp=time_stamp,
         parent_ts=parent_ts,
     )
-    updated_entries.extend(entries)
 
     create_parents = CreateParentNodes(
         cg,
@@ -399,7 +400,6 @@ def remove_edges(
                 graph_ids[cc], cross_edges, cross_edge_layers
             )
 
-    updated_entries = []
     cx_edges_d = cg.get_cross_chunk_edges(new_l2_ids, time_stamp=parent_ts)
     for new_id in new_l2_ids:
         new_cx_edges_d = cx_edges_d.get(new_id, {})
@@ -413,14 +413,13 @@ def remove_edges(
             new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
-    entries = _update_neighbor_cross_edges(
+    updated_entries = _update_neighbor_cross_edges(
         cg,
         new_l2_ids,
         new_old_id_d,
         time_stamp=time_stamp,
         parent_ts=parent_ts,
     )
-    updated_entries.extend(entries)
 
     create_parents = CreateParentNodes(
         cg,
@@ -595,7 +594,6 @@ def _create_new_parents(self, layer: int):
                 cc_ids,
                 parent_id,
             )
-
         for new_id in new_parent_ids:
             children = self.cg.get_children(new_id)
             self._update_cross_edge_cache(new_id, children)

From 0f95e0dc224d2306aa7d1339843a6e4856973a05 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 11 Sep 2023 15:05:21 +0000
Subject: [PATCH 060/105] fix: reset parent layer in loop

---
 pychunkedgraph/graph/edits.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index f835577e0..9e186c274 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -563,12 +563,12 @@ def _create_new_parents(self, layer: int):
         get cross edges of all, find connected components
         update parent old IDs
         """
-        parent_layer = layer + 1
         new_ids = self._new_ids_d[layer]
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         new_parent_ids = []
         for cc_indices in components:
+            parent_layer = layer + 1  # must be reset for each connected component
             cc_ids = graph_ids[cc_indices]
             if len(cc_ids) == 1:
                 # skip connection

From 8e14031e60d5e9d5065c063f332292cf9e36736d Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 11 Sep 2023 15:26:32 +0000
Subject: [PATCH 061/105] fix(ingest): use get_roots with ceil=False instead of
 get_parents

---
 pychunkedgraph/ingest/create/abstract_layers.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index d65e225a3..718ec74b7 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -212,16 +212,13 @@ def _write(
                 for layer in range(node_layer, cg.meta.layer_count):
                     if not layer in node_cx_edges_d:
                         continue
-
                     layer_edges = node_cx_edges_d[layer]
-                    edges_nodes = np.unique(layer_edges)
-                    edges_nodes_layers = cg.get_chunk_layers(edges_nodes)
-                    mask = edges_nodes_layers < layer_id - 1
-                    edges_nodes_parents = cg.get_parents(edges_nodes[mask])
-                    temp_map = dict(zip(edges_nodes[mask], edges_nodes_parents))
+                    nodes = np.unique(layer_edges)
+                    parents = cg.get_roots(nodes, stop_layer=parent_layer, ceil=False)
 
+                    edge_parents_d = dict(zip(nodes, parents))
                     layer_edges = fastremap.remap(
-                        layer_edges, temp_map, preserve_missing_labels=True
+                        layer_edges, edge_parents_d, preserve_missing_labels=True
                     )
                     layer_edges = np.unique(layer_edges, axis=0)
 

From 96a8a20e2c6487ce35a833ac7dac4742b6ac4b6c Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Mon, 11 Sep 2023 15:43:44 +0000
Subject: [PATCH 062/105] fix(ingest): incorrect stop_layer

---
 pychunkedgraph/ingest/create/abstract_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/abstract_layers.py
index 718ec74b7..adbe4a5ab 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/abstract_layers.py
@@ -214,7 +214,7 @@ def _write(
                         continue
                     layer_edges = node_cx_edges_d[layer]
                     nodes = np.unique(layer_edges)
-                    parents = cg.get_roots(nodes, stop_layer=parent_layer, ceil=False)
+                    parents = cg.get_roots(nodes, stop_layer=node_layer, ceil=False)
 
                     edge_parents_d = dict(zip(nodes, parents))
                     layer_edges = fastremap.remap(

From c8498bc37ea91eb04a8a158e80131154d7bb3a65 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 12 Sep 2023 14:07:54 +0000
Subject: [PATCH 063/105] fix: add safeguard to against data corruption

---
 pychunkedgraph/graph/chunkedgraph.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 988dd5d89..8c3e14166 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -676,6 +676,9 @@ def get_l2_agglomerations(
         sv_parent_d = {}
         for l2id in l2id_children_d:
             svs = l2id_children_d[l2id]
+            for sv in svs:
+                if sv in sv_parent_d:
+                    raise ValueError("Found conflicting parents.")
             sv_parent_d.update(dict(zip(svs.tolist(), [l2id] * len(svs))))
 
         in_edges, out_edges, cross_edges = edge_utils.categorize_edges_v2(

From 2963ff39b64c35abcbc074f21c4030e035e8bf25 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 12 Sep 2023 14:55:09 +0000
Subject: [PATCH 064/105] add another safeguard

---
 pychunkedgraph/graph/edits.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 9e186c274..5087f503d 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -567,9 +567,18 @@ def _create_new_parents(self, layer: int):
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         new_parent_ids = []
+        all_old_ids = []
+        for v in self._new_old_id_d.values():
+            all_old_ids.extend(v)
+        all_old_ids = np.array(all_old_ids, dtype=basetypes.NODE_ID)
+
         for cc_indices in components:
             parent_layer = layer + 1  # must be reset for each connected component
             cc_ids = graph_ids[cc_indices]
+            mask = np.isin(cc_ids, all_old_ids)
+            old_ids = cc_ids[mask]
+            new_ids = _get_flipped_ids(self._old_new_id_d, cc_ids[mask])
+            assert np.all(~mask), f"got old ids {old_ids} -> {new_ids}"
             if len(cc_ids) == 1:
                 # skip connection
                 parent_layer = self.cg.meta.layer_count

From 607e34d4fef128447ea35b134dd61b4b3b3a647c Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 12 Sep 2023 15:02:05 +0000
Subject: [PATCH 065/105] feat: log operation_id in errors

---
 pychunkedgraph/graph/edits.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 5087f503d..08792108e 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -309,6 +309,7 @@ def add_edges(
 
 def _process_l2_agglomeration(
     cg,
+    operation_id: int,
     agg: types.Agglomeration,
     removed_edges: np.ndarray,
     parent_ts: datetime.datetime = None,
@@ -321,7 +322,8 @@ def _process_l2_agglomeration(
 
     cross_edges = agg.cross_edges.get_pairs()
     parents = cg.get_parents(cross_edges[:, 0], time_stamp=parent_ts, raw_only=True)
-    assert np.unique(parents).size == 1, "got cross edges from more than one l2 node"
+    err = f"got cross edges from more than one l2 node; op {operation_id}"
+    assert np.unique(parents).size == 1, err
     root = cg.get_root(parents[0], time_stamp=parent_ts, raw_only=True)
 
     # inactive edges must be filtered out
@@ -384,7 +386,7 @@ def remove_edges(
     for id_ in l2ids:
         agg = l2id_agglomeration_d[id_]
         ccs, graph_ids, cross_edges = _process_l2_agglomeration(
-            cg, agg, removed_edges, parent_ts
+            cg, operation_id, agg, removed_edges, parent_ts
         )
         new_parents = cg.id_client.create_node_ids(chunk_id_map[agg.node_id], len(ccs))
 
@@ -432,6 +434,7 @@ def remove_edges(
         parent_ts=parent_ts,
     )
     new_roots = create_parents.run()
+    raise RuntimeError("haha")
     create_parents.create_new_entries()
     return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
 
@@ -578,7 +581,8 @@ def _create_new_parents(self, layer: int):
             mask = np.isin(cc_ids, all_old_ids)
             old_ids = cc_ids[mask]
             new_ids = _get_flipped_ids(self._old_new_id_d, cc_ids[mask])
-            assert np.all(~mask), f"got old ids {old_ids} -> {new_ids}"
+            err = f"got old ids {old_ids} -> {new_ids}; op {self._operation_id}"
+            assert np.all(~mask), err
             if len(cc_ids) == 1:
                 # skip connection
                 parent_layer = self.cg.meta.layer_count
@@ -637,7 +641,8 @@ def _update_root_id_lineage(self):
         former_roots = _get_flipped_ids(self._new_old_id_d, new_roots)
         former_roots = np.unique(former_roots)
 
-        assert len(former_roots) < 2 or len(new_roots) < 2, "new roots are inconsistent"
+        err = f"new roots are inconsistent; op {self._operation_id}"
+        assert len(former_roots) < 2 or len(new_roots) < 2, err
         for new_root_id in new_roots:
             val_dict = {
                 attributes.Hierarchy.FormerParent: former_roots,
@@ -687,9 +692,10 @@ def create_new_entries(self) -> List:
             for id_ in new_ids:
                 val_dict = val_dicts.get(id_, {})
                 children = self.cg.get_children(id_)
+                err = f"parent layer less than children; op {self._operation_id}"
                 assert np.max(
                     self.cg.get_chunk_layers(children)
-                ) < self.cg.get_chunk_layer(id_), "Parent layer less than children."
+                ) < self.cg.get_chunk_layer(id_), err
                 val_dict[attributes.Hierarchy.Child] = children
                 self.new_entries.append(
                     self.cg.client.mutate_row(

From e79b68942ee21ff60f604557216c83728b26de7d Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 12 Sep 2023 16:24:52 +0000
Subject: [PATCH 066/105] fix: remove temp error

---
 pychunkedgraph/graph/edits.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 08792108e..dd53f8538 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -434,7 +434,6 @@ def remove_edges(
         parent_ts=parent_ts,
     )
     new_roots = create_parents.run()
-    raise RuntimeError("haha")
     create_parents.create_new_entries()
     return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
 

From 2ba682762ee5143d57847253e51b07a832324384 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 12 Sep 2023 20:08:37 +0000
Subject: [PATCH 067/105] add more safeguards

---
 pychunkedgraph/graph/edits.py | 40 +++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index dd53f8538..da574db14 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -21,6 +21,7 @@
 from .utils.serializers import serialize_uint64
 from ..logging.log_db import TimeIt
 from ..utils.general import in2d
+from ..debug.utils import get_l2children
 
 
 def _init_old_hierarchy(cg, l2ids: np.ndarray, parent_ts: datetime.datetime = None):
@@ -187,7 +188,9 @@ def _update_neighbor_cross_edges_single(
         layer_edges = cx_edges_d.get(layer, types.empty_2d)
         counterparts.extend(layer_edges[:, 1])
 
-    cp_cx_edges_d = cg.get_cross_chunk_edges(counterparts, time_stamp=parent_ts)
+    cp_cx_edges_d = cg.get_cross_chunk_edges(
+        counterparts, time_stamp=parent_ts, raw_only=True
+    )
     updated_counterparts = {}
     for counterpart, edges_d in cp_cx_edges_d.items():
         val_dict = {}
@@ -207,17 +210,22 @@ def _update_neighbor_cross_edges_single(
 
 
 def _update_neighbor_cross_edges(
-    cg, new_ids: List[int], new_old_id_d: dict, *, time_stamp, parent_ts
+    cg, new_ids: List[int], new_old_id_d: dict, old_new_id_d, *, time_stamp, parent_ts
 ) -> List:
-    newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts)
+    node_map = {}
+    for k, v in old_new_id_d.items():
+        node_map[k] = next(iter(v))
+
     updated_counterparts = {}
+    newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts)
     for new_id in new_ids:
         cx_edges_d = newid_cx_edges_d[new_id]
         temp_map = {
             old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
         }
+        node_map.update(temp_map)
         result = _update_neighbor_cross_edges_single(
-            cg, new_id, cx_edges_d, temp_map, parent_ts=parent_ts
+            cg, new_id, cx_edges_d, node_map, parent_ts=parent_ts
         )
         updated_counterparts.update(result)
 
@@ -287,6 +295,7 @@ def add_edges(
         cg,
         new_l2_ids,
         new_old_id_d,
+        old_new_id_d,
         time_stamp=time_stamp,
         parent_ts=parent_ts,
     )
@@ -303,6 +312,9 @@ def add_edges(
     )
 
     new_roots = create_parents.run()
+    for new_root in new_roots:
+        l2c = get_l2children(cg, new_root)
+        assert len(l2c) == np.unique(l2c).size, f"inconsistent result op {operation_id}"
     create_parents.create_new_entries()
     return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
 
@@ -321,13 +333,13 @@ def _process_l2_agglomeration(
     chunk_edges = chunk_edges[~in2d(chunk_edges, removed_edges)]
 
     cross_edges = agg.cross_edges.get_pairs()
+    # we must avoid the cache to read roots to get segment state before edit began
     parents = cg.get_parents(cross_edges[:, 0], time_stamp=parent_ts, raw_only=True)
     err = f"got cross edges from more than one l2 node; op {operation_id}"
     assert np.unique(parents).size == 1, err
     root = cg.get_root(parents[0], time_stamp=parent_ts, raw_only=True)
 
     # inactive edges must be filtered out
-    # we must avoid the cache to read roots to get segment state before edit began
     neighbor_roots = cg.get_roots(
         cross_edges[:, 1], raw_only=True, time_stamp=parent_ts
     )
@@ -419,6 +431,7 @@ def remove_edges(
         cg,
         new_l2_ids,
         new_old_id_d,
+        old_new_id_d,
         time_stamp=time_stamp,
         parent_ts=parent_ts,
     )
@@ -434,6 +447,9 @@ def remove_edges(
         parent_ts=parent_ts,
     )
     new_roots = create_parents.run()
+    for new_root in new_roots:
+        l2c = get_l2children(cg, new_root)
+        assert len(l2c) == np.unique(l2c).size, f"inconsistent result op {operation_id}"
     create_parents.create_new_entries()
     return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
 
@@ -481,6 +497,7 @@ def _update_id_lineage(
         layer: int,
         parent_layer: int,
     ):
+        # update newly created children; mask others
         mask = np.in1d(children, self._new_ids_d[layer])
         for child_id in children[mask]:
             child_old_ids = self._new_old_id_d[child_id]
@@ -533,7 +550,7 @@ def _update_cross_edge_cache(self, parent, children):
         cx_edges_d = self.cg.get_cross_chunk_edges(
             children, time_stamp=self._last_successful_ts
         )
-        cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values(), unique=True)
+        cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values())
 
         parent_layer = self.cg.get_chunk_layer(parent)
         edge_nodes = np.unique(np.concatenate([*cx_edges_d.values(), types.empty_2d]))
@@ -569,19 +586,9 @@ def _create_new_parents(self, layer: int):
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         new_parent_ids = []
-        all_old_ids = []
-        for v in self._new_old_id_d.values():
-            all_old_ids.extend(v)
-        all_old_ids = np.array(all_old_ids, dtype=basetypes.NODE_ID)
-
         for cc_indices in components:
             parent_layer = layer + 1  # must be reset for each connected component
             cc_ids = graph_ids[cc_indices]
-            mask = np.isin(cc_ids, all_old_ids)
-            old_ids = cc_ids[mask]
-            new_ids = _get_flipped_ids(self._old_new_id_d, cc_ids[mask])
-            err = f"got old ids {old_ids} -> {new_ids}; op {self._operation_id}"
-            assert np.all(~mask), err
             if len(cc_ids) == 1:
                 # skip connection
                 parent_layer = self.cg.meta.layer_count
@@ -613,6 +620,7 @@ def _create_new_parents(self, layer: int):
             self.cg,
             new_parent_ids,
             self._new_old_id_d,
+            self._old_new_id_d,
             time_stamp=self._time_stamp,
             parent_ts=self._last_successful_ts,
         )

From 3c23f7edb66f612cdff0939ace90e47bca084226 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 12 Sep 2023 20:12:06 +0000
Subject: [PATCH 068/105] fix: circular import

---
 pychunkedgraph/debug/utils.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/pychunkedgraph/debug/utils.py b/pychunkedgraph/debug/utils.py
index e194f4ee1..53152ec6f 100644
--- a/pychunkedgraph/debug/utils.py
+++ b/pychunkedgraph/debug/utils.py
@@ -2,9 +2,6 @@
 
 import numpy as np
 
-from ..graph import ChunkedGraph
-from ..graph.utils.basetypes import NODE_ID
-
 
 def print_attrs(d):
     for k, v in d.items():
@@ -18,12 +15,7 @@ def print_attrs(d):
             print(v)
 
 
-def print_node(
-    cg: ChunkedGraph,
-    node: NODE_ID,
-    indent: int = 0,
-    stop_layer: int = 2,
-) -> None:
+def print_node(cg, node: np.uint64, indent: int = 0, stop_layer: int = 2) -> None:
     children = cg.get_children(node)
     print(f"{' ' * indent}{node}[{len(children)}]")
     if cg.get_chunk_layer(node) <= stop_layer:
@@ -32,8 +24,8 @@ def print_node(
         print_node(cg, child, indent=indent + 4, stop_layer=stop_layer)
 
 
-def get_l2children(cg: ChunkedGraph, node: NODE_ID) -> np.ndarray:
-    nodes = np.array([node], dtype=NODE_ID)
+def get_l2children(cg, node: np.uint64) -> np.ndarray:
+    nodes = np.array([node], dtype=np.uint64)
     layers = cg.get_chunk_layers(nodes)
     assert np.all(layers > 2), "nodes must be at layers > 2"
     l2children = []

From 0d9d090997713b79489ac6f6c3d6252dbc2b53c2 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 12 Sep 2023 20:28:02 +0000
Subject: [PATCH 069/105] fix: consider layer 2 as well

---
 pychunkedgraph/debug/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/debug/utils.py b/pychunkedgraph/debug/utils.py
index 53152ec6f..43562afd2 100644
--- a/pychunkedgraph/debug/utils.py
+++ b/pychunkedgraph/debug/utils.py
@@ -27,7 +27,7 @@ def print_node(cg, node: np.uint64, indent: int = 0, stop_layer: int = 2) -> Non
 def get_l2children(cg, node: np.uint64) -> np.ndarray:
     nodes = np.array([node], dtype=np.uint64)
     layers = cg.get_chunk_layers(nodes)
-    assert np.all(layers > 2), "nodes must be at layers > 2"
+    assert np.all(layers >= 2), "nodes must be at layers >= 2"
     l2children = []
     while nodes.size:
         children = cg.get_children(nodes, flatten=True)

From 399e09033fe1b392f6091f97245af5b0ece78013 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 13 Sep 2023 16:48:49 +0000
Subject: [PATCH 070/105] fix(edits): incorrect order of opeartions;
 documentation

---
 pychunkedgraph/graph/edits.py | 210 +++++++++++++++++-----------------
 1 file changed, 108 insertions(+), 102 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index da574db14..b9a07493a 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -5,6 +5,7 @@
 from typing import List
 from typing import Tuple
 from typing import Iterable
+from typing import Set
 from collections import defaultdict
 
 import fastremap
@@ -25,15 +26,13 @@
 
 
 def _init_old_hierarchy(cg, l2ids: np.ndarray, parent_ts: datetime.datetime = None):
-    new_old_id_d = defaultdict(set)
-    old_new_id_d = defaultdict(set)
     old_hierarchy_d = {id_: {2: id_} for id_ in l2ids}
     for id_ in l2ids:
         layer_parent_d = cg.get_all_parents_dict(id_, time_stamp=parent_ts)
         old_hierarchy_d[id_].update(layer_parent_d)
         for parent in layer_parent_d.values():
             old_hierarchy_d[parent] = old_hierarchy_d[id_]
-    return new_old_id_d, old_new_id_d, old_hierarchy_d
+    return old_hierarchy_d
 
 
 def _analyze_affected_edges(
@@ -179,64 +178,6 @@ def check_fake_edges(
     return atomic_edges, rows
 
 
-def _update_neighbor_cross_edges_single(
-    cg, new_id: int, cx_edges_d: dict, node_map: dict, *, parent_ts
-) -> dict:
-    node_layer = cg.get_chunk_layer(new_id)
-    counterparts = []
-    for layer in range(node_layer, cg.meta.layer_count):
-        layer_edges = cx_edges_d.get(layer, types.empty_2d)
-        counterparts.extend(layer_edges[:, 1])
-
-    cp_cx_edges_d = cg.get_cross_chunk_edges(
-        counterparts, time_stamp=parent_ts, raw_only=True
-    )
-    updated_counterparts = {}
-    for counterpart, edges_d in cp_cx_edges_d.items():
-        val_dict = {}
-        for layer in range(2, cg.meta.layer_count):
-            edges = edges_d.get(layer, types.empty_2d)
-            if edges.size == 0:
-                continue
-            assert np.all(edges[:, 0] == counterpart)
-            edges = fastremap.remap(edges, node_map, preserve_missing_labels=True)
-            edges_d[layer] = edges
-            val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
-        if not val_dict:
-            continue
-        cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
-        updated_counterparts[counterpart] = val_dict
-    return updated_counterparts
-
-
-def _update_neighbor_cross_edges(
-    cg, new_ids: List[int], new_old_id_d: dict, old_new_id_d, *, time_stamp, parent_ts
-) -> List:
-    node_map = {}
-    for k, v in old_new_id_d.items():
-        node_map[k] = next(iter(v))
-
-    updated_counterparts = {}
-    newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts)
-    for new_id in new_ids:
-        cx_edges_d = newid_cx_edges_d[new_id]
-        temp_map = {
-            old_id: new_id for old_id in _get_flipped_ids(new_old_id_d, [new_id])
-        }
-        node_map.update(temp_map)
-        result = _update_neighbor_cross_edges_single(
-            cg, new_id, cx_edges_d, node_map, parent_ts=parent_ts
-        )
-        updated_counterparts.update(result)
-
-    updated_entries = []
-    for node, val_dict in updated_counterparts.items():
-        rowkey = serialize_uint64(node)
-        row = cg.client.mutate_row(rowkey, val_dict, time_stamp=time_stamp)
-        updated_entries.append(row)
-    return updated_entries
-
-
 def add_edges(
     cg,
     *,
@@ -253,9 +194,10 @@ def add_edges(
     if not allow_same_segment_merge:
         roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts)
         assert np.unique(roots).size == 2, "L2 IDs must belong to different roots."
-    new_old_id_d, old_new_id_d, old_hierarchy_d = _init_old_hierarchy(
-        cg, l2ids, parent_ts=parent_ts
-    )
+
+    new_old_id_d = defaultdict(set)
+    old_new_id_d = defaultdict(set)
+    old_hierarchy_d = _init_old_hierarchy(cg, l2ids, parent_ts=parent_ts)
     atomic_children_d = cg.get_children(l2ids)
     cross_edges_d = merge_cross_edge_dicts(
         cg.get_cross_chunk_edges(l2ids, time_stamp=parent_ts), l2_cross_edges_d
@@ -291,14 +233,6 @@ def add_edges(
             new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
-    updated_entries = _update_neighbor_cross_edges(
-        cg,
-        new_l2_ids,
-        new_old_id_d,
-        old_new_id_d,
-        time_stamp=time_stamp,
-        parent_ts=parent_ts,
-    )
 
     create_parents = CreateParentNodes(
         cg,
@@ -316,7 +250,7 @@ def add_edges(
         l2c = get_l2children(cg, new_root)
         assert len(l2c) == np.unique(l2c).size, f"inconsistent result op {operation_id}"
     create_parents.create_new_entries()
-    return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
+    return new_roots, new_l2_ids, create_parents.new_entries
 
 
 def _process_l2_agglomeration(
@@ -388,9 +322,9 @@ def remove_edges(
     roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts)
     assert np.unique(roots).size == 1, "L2 IDs must belong to same root."
 
-    new_old_id_d, old_new_id_d, old_hierarchy_d = _init_old_hierarchy(
-        cg, l2ids, parent_ts=parent_ts
-    )
+    new_old_id_d = defaultdict(set)
+    old_new_id_d = defaultdict(set)
+    old_hierarchy_d = _init_old_hierarchy(cg, l2ids, parent_ts=parent_ts)
     chunk_id_map = dict(zip(l2ids.tolist(), cg.get_chunk_ids_from_node_ids(l2ids)))
 
     removed_edges = np.concatenate([atomic_edges, atomic_edges[:, ::-1]], axis=0)
@@ -427,14 +361,6 @@ def remove_edges(
             new_cx_edges_d[layer] = edges
             assert np.all(edges[:, 0] == new_id)
         cg.cache.cross_chunk_edges_cache[new_id] = new_cx_edges_d
-    updated_entries = _update_neighbor_cross_edges(
-        cg,
-        new_l2_ids,
-        new_old_id_d,
-        old_new_id_d,
-        time_stamp=time_stamp,
-        parent_ts=parent_ts,
-    )
 
     create_parents = CreateParentNodes(
         cg,
@@ -451,7 +377,7 @@ def remove_edges(
         l2c = get_l2children(cg, new_root)
         assert len(l2c) == np.unique(l2c).size, f"inconsistent result op {operation_id}"
     create_parents.create_new_entries()
-    return new_roots, new_l2_ids, updated_entries + create_parents.new_entries
+    return new_roots, new_l2_ids, create_parents.new_entries
 
 
 def _get_flipped_ids(id_map, node_ids):
@@ -466,6 +392,82 @@ def _get_flipped_ids(id_map, node_ids):
     return np.concatenate(ids)
 
 
+def _update_neighbor_cross_edges_single(
+    cg, new_id: int, cx_edges_d: dict, node_map: dict, *, parent_ts
+) -> dict:
+    """
+    For each new_id, get counterparts and update its cross chunk edges.
+    Some of them maybe updated multiple times so we need to collect them first
+    and then write to storage to consolidate the mutations.
+    Returns updated counterparts.
+    """
+    node_layer = cg.get_chunk_layer(new_id)
+    counterparts = []
+    for layer in range(node_layer, cg.meta.layer_count):
+        layer_edges = cx_edges_d.get(layer, types.empty_2d)
+        counterparts.extend(layer_edges[:, 1])
+
+    cp_cx_edges_d = cg.get_cross_chunk_edges(
+        counterparts, time_stamp=parent_ts, raw_only=True
+    )
+    updated_counterparts = {}
+    for counterpart, edges_d in cp_cx_edges_d.items():
+        val_dict = {}
+        for layer in range(2, cg.meta.layer_count):
+            edges = edges_d.get(layer, types.empty_2d)
+            if edges.size == 0:
+                continue
+            assert np.all(edges[:, 0] == counterpart)
+            edges = fastremap.remap(edges, node_map, preserve_missing_labels=True)
+            edges_d[layer] = edges
+            val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+        if not val_dict:
+            continue
+        cg.cache.cross_chunk_edges_cache[counterpart] = edges_d
+        updated_counterparts[counterpart] = val_dict
+    return updated_counterparts
+
+
+def _update_neighbor_cross_edges(
+    cg,
+    new_ids: List[int],
+    new_old_id: dict,
+    old_new_id,
+    *,
+    time_stamp,
+    parent_ts,
+) -> List:
+    """
+    For each new_id, get counterparts and update its cross chunk edges.
+    Some of them maybe updated multiple times so we need to collect them first
+    and then write to storage to consolidate the mutations.
+    Returns mutations to updated counterparts/partner nodes.
+    """
+    updated_counterparts = {}
+    newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts)
+
+    node_map = {}
+    for k, v in old_new_id.items():
+        if len(v) == 1:
+            node_map[k] = next(iter(v))
+
+    for new_id in new_ids:
+        cx_edges_d = newid_cx_edges_d[new_id]
+        m = {old_id: new_id for old_id in _get_flipped_ids(new_old_id, [new_id])}
+        node_map.update(m)
+        result = _update_neighbor_cross_edges_single(
+            cg, new_id, cx_edges_d, node_map, parent_ts=parent_ts
+        )
+        updated_counterparts.update(result)
+
+    updated_entries = []
+    for node, val_dict in updated_counterparts.items():
+        rowkey = serialize_uint64(node)
+        row = cg.client.mutate_row(rowkey, val_dict, time_stamp=time_stamp)
+        updated_entries.append(row)
+    return updated_entries
+
+
 class CreateParentNodes:
     def __init__(
         self,
@@ -474,8 +476,8 @@ def __init__(
         new_l2_ids: Iterable,
         operation_id: basetypes.OPERATION_ID,
         time_stamp: datetime.datetime,
-        new_old_id_d: Dict[np.uint64, Iterable[np.uint64]] = None,
-        old_new_id_d: Dict[np.uint64, Iterable[np.uint64]] = None,
+        new_old_id_d: Dict[np.uint64, Set[np.uint64]] = None,
+        old_new_id_d: Dict[np.uint64, Set[np.uint64]] = None,
         old_hierarchy_d: Dict[np.uint64, Dict[int, np.uint64]] = None,
         parent_ts: datetime.datetime = None,
     ):
@@ -547,12 +549,15 @@ def _update_cross_edge_cache(self, parent, children):
         updates cross chunk edges in cache;
         this can only be done after all new components at a layer have IDs
         """
+        parent_layer = self.cg.get_chunk_layer(parent)
+        if parent_layer == 2:
+            # l2 cross edges have already been updated by this point
+            return
         cx_edges_d = self.cg.get_cross_chunk_edges(
             children, time_stamp=self._last_successful_ts
         )
         cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values())
 
-        parent_layer = self.cg.get_chunk_layer(parent)
         edge_nodes = np.unique(np.concatenate([*cx_edges_d.values(), types.empty_2d]))
         edge_parents = self.cg.get_roots(
             edge_nodes,
@@ -603,28 +608,15 @@ def _create_new_parents(self, layer: int):
                 self.cg.get_parent_chunk_id(cc_ids[0], parent_layer),
                 root_chunk=parent_layer == self.cg.meta.layer_count,
             )
+            new_parent_ids.append(parent_id)
             self._new_ids_d[parent_layer].append(parent_id)
             self._update_id_lineage(parent_id, cc_ids, layer, parent_layer)
-            new_parent_ids.append(parent_id)
-
             self.cg.cache.children_cache[parent_id] = cc_ids
             cache_utils.update(
                 self.cg.cache.parents_cache,
                 cc_ids,
                 parent_id,
             )
-        for new_id in new_parent_ids:
-            children = self.cg.get_children(new_id)
-            self._update_cross_edge_cache(new_id, children)
-        entries = _update_neighbor_cross_edges(
-            self.cg,
-            new_parent_ids,
-            self._new_old_id_d,
-            self._old_new_id_d,
-            time_stamp=self._time_stamp,
-            parent_ts=self._last_successful_ts,
-        )
-        self.new_entries.extend(entries)
 
     def run(self) -> Iterable:
         """
@@ -640,6 +632,20 @@ def run(self) -> Iterable:
                 self.cg.graph_id,
                 self._operation_id,
             ):
+                # all new IDs in this layer have been created
+                # update their cross chunk edges and their neighbors'
+                for new_id in self._new_ids_d[layer]:
+                    children = self.cg.get_children(new_id)
+                    self._update_cross_edge_cache(new_id, children)
+                entries = _update_neighbor_cross_edges(
+                    self.cg,
+                    self._new_ids_d[layer],
+                    self._new_old_id_d,
+                    self._old_new_id_d,
+                    time_stamp=self._time_stamp,
+                    parent_ts=self._last_successful_ts,
+                )
+                self.new_entries.extend(entries)
                 self._create_new_parents(layer)
         return self._new_ids_d[self.cg.meta.layer_count]
 

From 6c707c0c68c50be316e19d2d8ee6867e26b5d64f Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Fri, 15 Sep 2023 14:51:57 +0000
Subject: [PATCH 071/105] feat(ingest): add tests command

---
 pychunkedgraph/debug/cross_edge_test.py |  60 --------
 pychunkedgraph/debug/existence_test.py  |  78 -----------
 pychunkedgraph/debug/family_test.py     |  54 -------
 pychunkedgraph/ingest/cli.py            |   9 ++
 pychunkedgraph/ingest/simple_tests.py   | 178 ++++++++++++++++++++++++
 5 files changed, 187 insertions(+), 192 deletions(-)
 delete mode 100644 pychunkedgraph/debug/cross_edge_test.py
 delete mode 100644 pychunkedgraph/debug/existence_test.py
 delete mode 100644 pychunkedgraph/debug/family_test.py
 create mode 100644 pychunkedgraph/ingest/simple_tests.py

diff --git a/pychunkedgraph/debug/cross_edge_test.py b/pychunkedgraph/debug/cross_edge_test.py
deleted file mode 100644
index 25bacfa0b..000000000
--- a/pychunkedgraph/debug/cross_edge_test.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import os
-from datetime import datetime
-import numpy as np
-
-from pychunkedgraph.graph import chunkedgraph
-from pychunkedgraph.graph import attributes
-
-#os.environ["GOOGLE_APPLICATION_CREDENTIALS"] =  "/home/svenmd/.cloudvolume/secrets/google-secret.json"
-
-layer = 2
-n_chunks = 1000
-n_segments_per_chunk = 200
-# timestamp = datetime.datetime.fromtimestamp(1588875769) 
-timestamp = datetime.utcnow()
-
-cg = chunkedgraph.ChunkedGraph(graph_id="pinky_nf_v2")
-
-np.random.seed(42)
-
-node_ids = []
-for _ in range(n_chunks):
-    c_x = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][0])
-    c_y = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][1])
-    c_z = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][2])
-
-    chunk_id = cg.get_chunk_id(layer=layer, x=c_x, y=c_y, z=c_z)
-
-    max_segment_id = cg.get_segment_id(cg.id_client.get_max_node_id(chunk_id))
-
-    if max_segment_id < 10:
-        continue
-
-    segment_ids = np.random.randint(1, max_segment_id, n_segments_per_chunk)
-
-    for segment_id in segment_ids:
-        node_ids.append(cg.get_node_id(np.uint64(segment_id), np.uint64(chunk_id)))
-
-rows = cg.client.read_nodes(node_ids=node_ids, end_time=timestamp, 
-                            properties=attributes.Hierarchy.Parent)
-valid_node_ids = []
-non_valid_node_ids = []
-for k in rows.keys():
-    if len(rows[k]) > 0:
-        valid_node_ids.append(k)
-    else:
-        non_valid_node_ids.append(k)
-
-cc_edges = cg.get_atomic_cross_edges(valid_node_ids)
-cc_ids = np.unique(np.concatenate([np.concatenate(list(v.values())) for v in list(cc_edges.values()) if len(v.values())]))
-
-roots = cg.get_roots(cc_ids)
-root_dict = dict(zip(cc_ids, roots)) 
-root_dict_vec = np.vectorize(root_dict.get)
-
-for k in cc_edges:
-    if len(cc_edges[k]) == 0:
-        continue
-    local_ids = np.unique(np.concatenate(list(cc_edges[k].values())))
-    
-    assert len(np.unique(root_dict_vec(local_ids)))
\ No newline at end of file
diff --git a/pychunkedgraph/debug/existence_test.py b/pychunkedgraph/debug/existence_test.py
deleted file mode 100644
index 757d3d542..000000000
--- a/pychunkedgraph/debug/existence_test.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import os
-from datetime import datetime
-import numpy as np
-
-from pychunkedgraph.graph import chunkedgraph
-from pychunkedgraph.graph import attributes
-
-#os.environ["GOOGLE_APPLICATION_CREDENTIALS"] =  "/home/svenmd/.cloudvolume/secrets/google-secret.json"
-
-layer = 2
-n_chunks = 100
-n_segments_per_chunk = 200
-# timestamp = datetime.datetime.fromtimestamp(1588875769) 
-timestamp = datetime.utcnow()
-
-cg = chunkedgraph.ChunkedGraph(graph_id="pinky_nf_v2")
-
-np.random.seed(42)
-
-node_ids = []
-for _ in range(n_chunks):
-    c_x = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][0])
-    c_y = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][1])
-    c_z = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][2])
-
-    chunk_id = cg.get_chunk_id(layer=layer, x=c_x, y=c_y, z=c_z)
-
-    max_segment_id = cg.get_segment_id(cg.id_client.get_max_node_id(chunk_id))
-
-    if max_segment_id < 10:
-        continue
-
-    segment_ids = np.random.randint(1, max_segment_id, n_segments_per_chunk)
-
-    for segment_id in segment_ids:
-        node_ids.append(cg.get_node_id(np.uint64(segment_id), np.uint64(chunk_id)))
-
-rows = cg.client.read_nodes(node_ids=node_ids, end_time=timestamp, 
-                            properties=attributes.Hierarchy.Parent)
-valid_node_ids = []
-non_valid_node_ids = []
-for k in rows.keys():
-    if len(rows[k]) > 0:
-        valid_node_ids.append(k)
-    else:
-        non_valid_node_ids.append(k)
-
-roots = cg.get_roots(valid_node_ids, time_stamp=timestamp)
-
-roots = []
-try:
-    roots = cg.get_roots(valid_node_ids)
-    assert len(roots) == len(valid_node_ids)
-    print(f"ALL {len(roots)} have been successful!")
-except:
-    print("At least one node failed. Checking nodes one by one now")
-
-if len(roots) != len(valid_node_ids):
-    log_dict = {}
-    success_dict = {}
-    for node_id in valid_node_ids:
-        try:
-            root = cg.get_root(node_id, time_stamp=timestamp)
-            print(f"Success: {node_id} from chunk {cg.get_chunk_id(node_id)}")
-            success_dict[node_id] = True
-        except Exception as e:
-            print(f"{node_id} from chunk {cg.get_chunk_id(node_id)} failed with {e}")
-            success_dict[node_id] = False
-
-            t_id = node_id
-
-            while t_id is not None:
-                last_working_chunk = cg.get_chunk_id(t_id)
-                t_id = cg.get_parent(t_id)
-
-            print(f"Failed on layer {cg.get_chunk_layer(last_working_chunk)} in chunk {last_working_chunk}")
-            log_dict[node_id] = last_working_chunk
-
diff --git a/pychunkedgraph/debug/family_test.py b/pychunkedgraph/debug/family_test.py
deleted file mode 100644
index 198351e74..000000000
--- a/pychunkedgraph/debug/family_test.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import os
-from datetime import datetime
-import numpy as np
-
-from pychunkedgraph.graph import chunkedgraph
-from pychunkedgraph.graph import attributes
-
-# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] =  "/home/svenmd/.cloudvolume/secrets/google-secret.json"
-
-layers = [2, 3, 4, 5, 6, 7]
-n_chunks = 10
-n_segments_per_chunk = 200
-# timestamp = datetime.datetime.fromtimestamp(1588875769) 
-timestamp = datetime.utcnow()
-
-cg = chunkedgraph.ChunkedGraph(graph_id="pinky_nf_v2")
-
-np.random.seed(42)
-
-node_ids = []
-
-for layer in layers:
-    for _ in range(n_chunks):
-        c_x = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][0])
-        c_y = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][1])
-        c_z = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][2])
-
-        chunk_id = cg.get_chunk_id(layer=layer, x=c_x, y=c_y, z=c_z)
-
-        max_segment_id = cg.get_segment_id(cg.id_client.get_max_node_id(chunk_id))
-
-        if max_segment_id < 10:
-            continue
-
-        segment_ids = np.random.randint(1, max_segment_id, n_segments_per_chunk)
-
-        for segment_id in segment_ids:
-            node_ids.append(cg.get_node_id(np.uint64(segment_id), np.uint64(chunk_id)))
-
-rows = cg.client.read_nodes(node_ids=node_ids, end_time=timestamp, 
-                            properties=attributes.Hierarchy.Parent)
-valid_node_ids = []
-non_valid_node_ids = []
-for k in rows.keys():
-    if len(rows[k]) > 0:
-        valid_node_ids.append(k)
-    else:
-        non_valid_node_ids.append(k)
-
-parents = cg.get_parents(valid_node_ids, time_stamp=timestamp)
-children_dict = cg.get_children(parents)
-
-for child, parent in zip(valid_node_ids, parents):
-    assert child in children_dict[parent]
\ No newline at end of file
diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 89106a097..67182fc81 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -21,6 +21,7 @@
 from .manager import IngestionManager
 from .utils import bootstrap
 from .utils import chunk_id_str
+from .simple_tests import run_all
 from .create.abstract_layers import add_layer
 from ..graph.chunkedgraph import ChunkedGraph
 from ..utils.redis import get_redis_connection
@@ -196,3 +197,11 @@ def ingest_chunk_local(graph_id: str, chunk_info, n_threads: int):
     else:
         cg = ChunkedGraph(graph_id=graph_id)
         add_layer(cg, chunk_info[0], chunk_info[1:], n_threads=n_threads)
+    cg = ChunkedGraph(graph_id=graph_id)
+    add_layer(cg, chunk_info[0], chunk_info[1:], n_threads=n_threads)
+
+
+@ingest_cli.command("run_tests")
+@click.argument("graph_id", type=str)
+def run_tests(graph_id):
+    run_all(ChunkedGraph(graph_id=graph_id))
diff --git a/pychunkedgraph/ingest/simple_tests.py b/pychunkedgraph/ingest/simple_tests.py
new file mode 100644
index 000000000..33946bcec
--- /dev/null
+++ b/pychunkedgraph/ingest/simple_tests.py
@@ -0,0 +1,178 @@
+# pylint: disable=invalid-name, missing-function-docstring, broad-exception-caught
+
+"""
+Some sanity tests to ensure chunkedgraph was created properly.
+"""
+
+from datetime import datetime
+import numpy as np
+
+from pychunkedgraph.graph import ChunkedGraph
+from pychunkedgraph.graph import attributes
+
+
+def family(cg: ChunkedGraph):
+    np.random.seed(42)
+    n_chunks = 100
+    n_segments_per_chunk = 200
+    timestamp = datetime.utcnow()
+
+    node_ids = []
+    for layer in range(2, cg.meta.layer_count - 1):
+        for _ in range(n_chunks):
+            c_x = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][0])
+            c_y = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][1])
+            c_z = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][2])
+            chunk_id = cg.get_chunk_id(layer=layer, x=c_x, y=c_y, z=c_z)
+            max_segment_id = cg.get_segment_id(cg.id_client.get_max_node_id(chunk_id))
+            if max_segment_id < 10:
+                continue
+
+            segment_ids = np.random.randint(1, max_segment_id, n_segments_per_chunk)
+            for segment_id in segment_ids:
+                node_ids.append(
+                    cg.get_node_id(np.uint64(segment_id), np.uint64(chunk_id))
+                )
+
+    rows = cg.client.read_nodes(
+        node_ids=node_ids, end_time=timestamp, properties=attributes.Hierarchy.Parent
+    )
+    valid_node_ids = []
+    non_valid_node_ids = []
+    for k in rows.keys():
+        if len(rows[k]) > 0:
+            valid_node_ids.append(k)
+        else:
+            non_valid_node_ids.append(k)
+
+    parents = cg.get_parents(valid_node_ids, time_stamp=timestamp)
+    children_dict = cg.get_children(parents)
+    for child, parent in zip(valid_node_ids, parents):
+        assert child in children_dict[parent]
+    print("success")
+
+
+def existence(cg: ChunkedGraph):
+    np.random.seed(42)
+    layer = 2
+    n_chunks = 100
+    n_segments_per_chunk = 200
+    timestamp = datetime.utcnow()
+    node_ids = []
+    for _ in range(n_chunks):
+        c_x = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][0])
+        c_y = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][1])
+        c_z = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][2])
+        chunk_id = cg.get_chunk_id(layer=layer, x=c_x, y=c_y, z=c_z)
+        max_segment_id = cg.get_segment_id(cg.id_client.get_max_node_id(chunk_id))
+        if max_segment_id < 10:
+            continue
+
+        segment_ids = np.random.randint(1, max_segment_id, n_segments_per_chunk)
+        for segment_id in segment_ids:
+            node_ids.append(cg.get_node_id(np.uint64(segment_id), np.uint64(chunk_id)))
+
+    rows = cg.client.read_nodes(
+        node_ids=node_ids, end_time=timestamp, properties=attributes.Hierarchy.Parent
+    )
+    valid_node_ids = []
+    non_valid_node_ids = []
+    for k in rows.keys():
+        if len(rows[k]) > 0:
+            valid_node_ids.append(k)
+        else:
+            non_valid_node_ids.append(k)
+
+    roots = []
+    try:
+        roots = cg.get_roots(valid_node_ids)
+        assert len(roots) == len(valid_node_ids)
+        print("success")
+    except Exception as e:
+        print(f"Something went wrong: {e}")
+        print("At least one node failed. Checking nodes one by one:")
+
+    if len(roots) != len(valid_node_ids):
+        log_dict = {}
+        success_dict = {}
+        for node_id in valid_node_ids:
+            try:
+                _ = cg.get_root(node_id, time_stamp=timestamp)
+                print(f"Success: {node_id} from chunk {cg.get_chunk_id(node_id)}")
+                success_dict[node_id] = True
+            except Exception as e:
+                print(f"{node_id} - chunk {cg.get_chunk_id(node_id)} failed: {e}")
+                success_dict[node_id] = False
+                t_id = node_id
+                while t_id is not None:
+                    last_working_chunk = cg.get_chunk_id(t_id)
+                    t_id = cg.get_parent(t_id)
+
+                layer = cg.get_chunk_layer(last_working_chunk)
+                print(f"Failed on layer {layer} in chunk {last_working_chunk}")
+                log_dict[node_id] = last_working_chunk
+
+
+def cross_edges(cg: ChunkedGraph):
+    np.random.seed(42)
+    layer = 2
+    n_chunks = 10
+    n_segments_per_chunk = 200
+    timestamp = datetime.utcnow()
+    node_ids = []
+    for _ in range(n_chunks):
+        c_x = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][0])
+        c_y = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][1])
+        c_z = np.random.randint(0, cg.meta.layer_chunk_bounds[layer][2])
+        chunk_id = cg.get_chunk_id(layer=layer, x=c_x, y=c_y, z=c_z)
+        max_segment_id = cg.get_segment_id(cg.id_client.get_max_node_id(chunk_id))
+        if max_segment_id < 10:
+            continue
+
+        segment_ids = np.random.randint(1, max_segment_id, n_segments_per_chunk)
+        for segment_id in segment_ids:
+            node_ids.append(cg.get_node_id(np.uint64(segment_id), np.uint64(chunk_id)))
+
+    rows = cg.client.read_nodes(
+        node_ids=node_ids, end_time=timestamp, properties=attributes.Hierarchy.Parent
+    )
+    valid_node_ids = []
+    non_valid_node_ids = []
+    for k in rows.keys():
+        if len(rows[k]) > 0:
+            valid_node_ids.append(k)
+        else:
+            non_valid_node_ids.append(k)
+
+    cc_edges = cg.get_atomic_cross_edges(valid_node_ids)
+    cc_ids = np.unique(
+        np.concatenate(
+            [
+                np.concatenate(list(v.values()))
+                for v in list(cc_edges.values())
+                if len(v.values())
+            ]
+        )
+    )
+
+    roots = cg.get_roots(cc_ids)
+    root_dict = dict(zip(cc_ids, roots))
+    root_dict_vec = np.vectorize(root_dict.get)
+
+    for k in cc_edges:
+        if len(cc_edges[k]) == 0:
+            continue
+        local_ids = np.unique(np.concatenate(list(cc_edges[k].values())))
+        assert len(np.unique(root_dict_vec(local_ids)))
+    print("success")
+
+
+def run_all(cg: ChunkedGraph):
+    print("Running family tests:")
+    family(cg)
+
+    print("\nRunning existence tests:")
+    existence(cg)
+
+    print("\nRunning cross_edges tests:")
+    cross_edges(cg)

From 9d9887bba417a451780442b57cb6c70d9f095ecf Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 26 Sep 2023 15:46:54 +0000
Subject: [PATCH 072/105] fix(edits): make sure to add reverse edges

---
 pychunkedgraph/graph/edits.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index b9a07493a..f4b6fc0ce 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -403,9 +403,12 @@ def _update_neighbor_cross_edges_single(
     """
     node_layer = cg.get_chunk_layer(new_id)
     counterparts = []
+    counterpart_layers = {}
     for layer in range(node_layer, cg.meta.layer_count):
         layer_edges = cx_edges_d.get(layer, types.empty_2d)
         counterparts.extend(layer_edges[:, 1])
+        layers_d = dict(zip(layer_edges[:, 1], [layer] * len(layer_edges[:, 1])))
+        counterpart_layers.update(layers_d)
 
     cp_cx_edges_d = cg.get_cross_chunk_edges(
         counterparts, time_stamp=parent_ts, raw_only=True
@@ -413,12 +416,18 @@ def _update_neighbor_cross_edges_single(
     updated_counterparts = {}
     for counterpart, edges_d in cp_cx_edges_d.items():
         val_dict = {}
+        counterpart_layer = counterpart_layers[counterpart]
         for layer in range(2, cg.meta.layer_count):
             edges = edges_d.get(layer, types.empty_2d)
             if edges.size == 0:
                 continue
             assert np.all(edges[:, 0] == counterpart)
             edges = fastremap.remap(edges, node_map, preserve_missing_labels=True)
+            if layer == counterpart_layer:
+                reverse_edge = np.array([counterpart, new_id], dtype=basetypes.NODE_ID)
+                edges = np.concatenate([edges, [reverse_edge]])
+                edges = np.unique(edges, axis=0)
+
             edges_d[layer] = edges
             val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
         if not val_dict:
@@ -445,7 +454,6 @@ def _update_neighbor_cross_edges(
     """
     updated_counterparts = {}
     newid_cx_edges_d = cg.get_cross_chunk_edges(new_ids, time_stamp=parent_ts)
-
     node_map = {}
     for k, v in old_new_id.items():
         if len(v) == 1:
@@ -459,7 +467,6 @@ def _update_neighbor_cross_edges(
             cg, new_id, cx_edges_d, node_map, parent_ts=parent_ts
         )
         updated_counterparts.update(result)
-
     updated_entries = []
     for node, val_dict in updated_counterparts.items():
         rowkey = serialize_uint64(node)
@@ -557,7 +564,6 @@ def _update_cross_edge_cache(self, parent, children):
             children, time_stamp=self._last_successful_ts
         )
         cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values())
-
         edge_nodes = np.unique(np.concatenate([*cx_edges_d.values(), types.empty_2d]))
         edge_parents = self.cg.get_roots(
             edge_nodes,
@@ -590,7 +596,6 @@ def _create_new_parents(self, layer: int):
         new_ids = self._new_ids_d[layer]
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
-        new_parent_ids = []
         for cc_indices in components:
             parent_layer = layer + 1  # must be reset for each connected component
             cc_ids = graph_ids[cc_indices]
@@ -608,7 +613,6 @@ def _create_new_parents(self, layer: int):
                 self.cg.get_parent_chunk_id(cc_ids[0], parent_layer),
                 root_chunk=parent_layer == self.cg.meta.layer_count,
             )
-            new_parent_ids.append(parent_id)
             self._new_ids_d[parent_layer].append(parent_id)
             self._update_id_lineage(parent_id, cc_ids, layer, parent_layer)
             self.cg.cache.children_cache[parent_id] = cc_ids

From 4a0cba88d37323137e59bfc443521beee0b871e2 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 26 Sep 2023 19:47:24 +0000
Subject: [PATCH 073/105] fix(edits): read neighbor cx edges from cache

---
 pychunkedgraph/graph/edits.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index f4b6fc0ce..36188a03e 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -410,9 +410,7 @@ def _update_neighbor_cross_edges_single(
         layers_d = dict(zip(layer_edges[:, 1], [layer] * len(layer_edges[:, 1])))
         counterpart_layers.update(layers_d)
 
-    cp_cx_edges_d = cg.get_cross_chunk_edges(
-        counterparts, time_stamp=parent_ts, raw_only=True
-    )
+    cp_cx_edges_d = cg.get_cross_chunk_edges(counterparts, time_stamp=parent_ts)
     updated_counterparts = {}
     for counterpart, edges_d in cp_cx_edges_d.items():
         val_dict = {}

From 3b5c2bc2c09a61e899ccd5414d99e07884a82100 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 27 Sep 2023 16:17:45 +0000
Subject: [PATCH 074/105] fix(edits): check for no cx edges; comments

---
 pychunkedgraph/graph/edits.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 36188a03e..c348b4fcc 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -269,8 +269,11 @@ def _process_l2_agglomeration(
     cross_edges = agg.cross_edges.get_pairs()
     # we must avoid the cache to read roots to get segment state before edit began
     parents = cg.get_parents(cross_edges[:, 0], time_stamp=parent_ts, raw_only=True)
+
+    # if there are cross edges, there must be a single parent.
+    # if there aren't any, there must be no parents. XOR these 2 conditions.
     err = f"got cross edges from more than one l2 node; op {operation_id}"
-    assert np.unique(parents).size == 1, err
+    assert (np.unique(parents).size == 1) != (cross_edges.size == 0), err
     root = cg.get_root(parents[0], time_stamp=parent_ts, raw_only=True)
 
     # inactive edges must be filtered out

From 2d3441bc98a79bb4f8f39db88cdd578f8605160f Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Tue, 3 Oct 2023 21:13:11 +0000
Subject: [PATCH 075/105] fix(edits): update neighbor cx edges in a skipped
 layer

---
 pychunkedgraph/graph/edits.py | 54 ++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index c348b4fcc..9f96db786 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -623,6 +623,45 @@ def _create_new_parents(self, layer: int):
                 parent_id,
             )
 
+    def _update_skipped_neighbors(self, current_layer):
+        """
+        Update neighbor nodes in a skipped layer to reflect changes in their descendants.
+        Get neighbors of new ids at `current_layer - 1`.
+        Get their parents and update their cx edges.
+        """
+        neighbors = []
+        lower_new_ids = self._new_ids_d[current_layer - 1]
+        newid_cx_edges_d = self.cg.get_cross_chunk_edges(
+            lower_new_ids, time_stamp=self._last_successful_ts
+        )
+        for cx_edges_d in newid_cx_edges_d.values():
+            for edges in cx_edges_d.values():
+                neighbors.extend(edges[:, 1])
+
+        neighbor_parents = self.cg.get_parents(
+            neighbors, time_stamp=self._last_successful_ts
+        )
+        parents_layers = self.cg.get_chunk_layers(neighbor_parents)
+        neighbor_parents = neighbor_parents[parents_layers == current_layer]
+
+        updated_entries = []
+        children_d = self.cg.get_children(neighbor_parents)
+        for parent, children in children_d.items():
+            self._update_cross_edge_cache(parent, children)
+            edges_d = self.cg.cache.cross_chunk_edges_cache[parent]
+            val_dict = {}
+            for layer in range(2, self.cg.meta.layer_count):
+                edges = edges_d.get(layer, types.empty_2d)
+                if edges.size == 0:
+                    continue
+                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+            rowkey = serialize_uint64(parent)
+            row = self.cg.client.mutate_row(
+                rowkey, val_dict, time_stamp=self._time_stamp
+            )
+            updated_entries.append(row)
+        return updated_entries
+
     def run(self) -> Iterable:
         """
         After new level 2 IDs are created, create parents in higher layers.
@@ -631,14 +670,15 @@ def run(self) -> Iterable:
         self._new_ids_d[2] = self._new_l2_ids
         for layer in range(2, self.cg.meta.layer_count):
             if len(self._new_ids_d[layer]) == 0:
+                # if there are no new ids in a layer due to a skipped connection
+                # ensure updates to cx edges of parents of neighbors from previous layer
+                entries = self._update_skipped_neighbors(layer)
+                self.new_entries.extend(entries)
                 continue
-            with TimeIt(
-                f"create_new_parents_layer.{layer}",
-                self.cg.graph_id,
-                self._operation_id,
-            ):
-                # all new IDs in this layer have been created
-                # update their cross chunk edges and their neighbors'
+            # all new IDs in this layer have been created
+            # update their cross chunk edges and their neighbors'
+            m = f"create_new_parents_layer.{layer}"
+            with TimeIt(m, self.cg.graph_id, self._operation_id):
                 for new_id in self._new_ids_d[layer]:
                     children = self.cg.get_children(new_id)
                     self._update_cross_edge_cache(new_id, children)

From 3131a0d70635d94f562b4d7fc84da0d54b143056 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 11 Oct 2023 21:03:11 +0000
Subject: [PATCH 076/105] fix(edits): make sure to update all skipped neighbors

---
 pychunkedgraph/graph/edits.py | 96 ++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 53 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 9f96db786..2edfd3137 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -428,7 +428,6 @@ def _update_neighbor_cross_edges_single(
                 reverse_edge = np.array([counterpart, new_id], dtype=basetypes.NODE_ID)
                 edges = np.concatenate([edges, [reverse_edge]])
                 edges = np.unique(edges, axis=0)
-
             edges_d[layer] = edges
             val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
         if not val_dict:
@@ -584,6 +583,39 @@ def _update_cross_edge_cache(self, parent, children):
             assert np.all(edges[:, 0] == parent)
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
 
+    def _update_neighbor_parents(self, neighbor, ceil_layer, updated) -> list:
+        updated_parents = []
+        while True:
+            parent = self.cg.get_parent(neighbor, time_stamp=self._last_successful_ts)
+            parent_layer = self.cg.get_chunk_layer(parent)
+            if parent_layer >= ceil_layer or parent in updated:
+                break
+            children = self.cg.get_children(parent)
+            self._update_cross_edge_cache(parent, children)
+            updated_parents.append(parent)
+            neighbor = parent
+        return updated_parents
+
+    def _update_skipped_neighbors(self, node, layer, parent_layer):
+        updated_parents = set()
+        cx_edges_d = self.cg.cache.cross_chunk_edges_cache[node]
+        for l in range(layer, parent_layer + 1):
+            layer_edges = cx_edges_d.get(l, types.empty_2d)
+            neighbors = layer_edges[:, 1]
+            for n in neighbors:
+                res = self._update_neighbor_parents(n, parent_layer, updated_parents)
+                updated_parents.update(res)
+
+        updated_entries = []
+        for parent in updated_parents:
+            val_dict = {}
+            for layer, edges in self.cg.cache.cross_chunk_edges_cache[parent].items():
+                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+            rkey = serialize_uint64(parent)
+            row = self.cg.client.mutate_row(rkey, val_dict, time_stamp=self._time_stamp)
+            updated_entries.append(row)
+        return updated_entries
+
     def _create_new_parents(self, layer: int):
         """
         keep track of old IDs
@@ -598,6 +630,7 @@ def _create_new_parents(self, layer: int):
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         for cc_indices in components:
+            update_skipped_neighbors = False
             parent_layer = layer + 1  # must be reset for each connected component
             cc_ids = graph_ids[cc_indices]
             if len(cc_ids) == 1:
@@ -610,57 +643,18 @@ def _create_new_parents(self, layer: int):
                     if len(cx_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0:
                         parent_layer = l
                         break
-            parent_id = self.cg.id_client.create_node_id(
+                update_skipped_neighbors = cc_ids[0] in self._new_old_id_d
+            parent = self.cg.id_client.create_node_id(
                 self.cg.get_parent_chunk_id(cc_ids[0], parent_layer),
                 root_chunk=parent_layer == self.cg.meta.layer_count,
             )
-            self._new_ids_d[parent_layer].append(parent_id)
-            self._update_id_lineage(parent_id, cc_ids, layer, parent_layer)
-            self.cg.cache.children_cache[parent_id] = cc_ids
-            cache_utils.update(
-                self.cg.cache.parents_cache,
-                cc_ids,
-                parent_id,
-            )
-
-    def _update_skipped_neighbors(self, current_layer):
-        """
-        Update neighbor nodes in a skipped layer to reflect changes in their descendants.
-        Get neighbors of new ids at `current_layer - 1`.
-        Get their parents and update their cx edges.
-        """
-        neighbors = []
-        lower_new_ids = self._new_ids_d[current_layer - 1]
-        newid_cx_edges_d = self.cg.get_cross_chunk_edges(
-            lower_new_ids, time_stamp=self._last_successful_ts
-        )
-        for cx_edges_d in newid_cx_edges_d.values():
-            for edges in cx_edges_d.values():
-                neighbors.extend(edges[:, 1])
-
-        neighbor_parents = self.cg.get_parents(
-            neighbors, time_stamp=self._last_successful_ts
-        )
-        parents_layers = self.cg.get_chunk_layers(neighbor_parents)
-        neighbor_parents = neighbor_parents[parents_layers == current_layer]
-
-        updated_entries = []
-        children_d = self.cg.get_children(neighbor_parents)
-        for parent, children in children_d.items():
-            self._update_cross_edge_cache(parent, children)
-            edges_d = self.cg.cache.cross_chunk_edges_cache[parent]
-            val_dict = {}
-            for layer in range(2, self.cg.meta.layer_count):
-                edges = edges_d.get(layer, types.empty_2d)
-                if edges.size == 0:
-                    continue
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
-            rowkey = serialize_uint64(parent)
-            row = self.cg.client.mutate_row(
-                rowkey, val_dict, time_stamp=self._time_stamp
-            )
-            updated_entries.append(row)
-        return updated_entries
+            self._new_ids_d[parent_layer].append(parent)
+            self._update_id_lineage(parent, cc_ids, layer, parent_layer)
+            self.cg.cache.children_cache[parent] = cc_ids
+            cache_utils.update(self.cg.cache.parents_cache, cc_ids, parent)
+            if update_skipped_neighbors:
+                res = self._update_skipped_neighbors(cc_ids[0], layer, parent_layer)
+                self.new_entries.extend(res)
 
     def run(self) -> Iterable:
         """
@@ -670,10 +664,6 @@ def run(self) -> Iterable:
         self._new_ids_d[2] = self._new_l2_ids
         for layer in range(2, self.cg.meta.layer_count):
             if len(self._new_ids_d[layer]) == 0:
-                # if there are no new ids in a layer due to a skipped connection
-                # ensure updates to cx edges of parents of neighbors from previous layer
-                entries = self._update_skipped_neighbors(layer)
-                self.new_entries.extend(entries)
                 continue
             # all new IDs in this layer have been created
             # update their cross chunk edges and their neighbors'

From fbc78746000b6f186ebd119c553ad848a7a8c97a Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Wed, 11 Oct 2023 23:02:01 +0000
Subject: [PATCH 077/105] fix(edits): ignore new ids in neighbor update

---
 pychunkedgraph/graph/edits.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 2edfd3137..d2523715b 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -603,6 +603,9 @@ def _update_skipped_neighbors(self, node, layer, parent_layer):
             layer_edges = cx_edges_d.get(l, types.empty_2d)
             neighbors = layer_edges[:, 1]
             for n in neighbors:
+                if n in self._new_old_id_d:
+                    # ignore new ids
+                    continue
                 res = self._update_neighbor_parents(n, parent_layer, updated_parents)
                 updated_parents.update(res)
 

From 7e229abdc9b986e23540b262077fac99a521f45b Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Thu, 12 Oct 2023 17:17:44 +0000
Subject: [PATCH 078/105] add docs

---
 pychunkedgraph/graph/edits.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index d2523715b..839db48b9 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -494,7 +494,7 @@ def __init__(
         self._old_hierarchy_d = old_hierarchy_d
         self._new_old_id_d = new_old_id_d
         self._old_new_id_d = old_new_id_d
-        self._new_ids_d = defaultdict(list)  # new IDs in each layer
+        self._new_ids_d = defaultdict(list)
         self._operation_id = operation_id
         self._time_stamp = time_stamp
         self._last_successful_ts = parent_ts
@@ -572,7 +572,6 @@ def _update_cross_edge_cache(self, parent, children):
             time_stamp=self._last_successful_ts,
         )
         edge_parents_d = dict(zip(edge_nodes, edge_parents))
-
         new_cx_edges_d = {}
         for layer in range(parent_layer, self.cg.meta.layer_count):
             edges = cx_edges_d.get(layer, types.empty_2d)
@@ -583,8 +582,9 @@ def _update_cross_edge_cache(self, parent, children):
             assert np.all(edges[:, 0] == parent)
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
 
-    def _update_neighbor_parents(self, neighbor, ceil_layer, updated) -> list:
-        updated_parents = []
+    def _update_neighbor_parents(self, neighbor, ceil_layer: int, updated: set) -> list:
+        """helper for `_update_skipped_neighbors`"""
+        parents = []
         while True:
             parent = self.cg.get_parent(neighbor, time_stamp=self._last_successful_ts)
             parent_layer = self.cg.get_chunk_layer(parent)
@@ -592,15 +592,22 @@ def _update_neighbor_parents(self, neighbor, ceil_layer, updated) -> list:
                 break
             children = self.cg.get_children(parent)
             self._update_cross_edge_cache(parent, children)
-            updated_parents.append(parent)
+            parents.append(parent)
             neighbor = parent
-        return updated_parents
+        return parents
 
     def _update_skipped_neighbors(self, node, layer, parent_layer):
+        """
+        Updates cross edges of neighbors of a skip connection node.
+        Neighbors of such nodes can have parents at contiguous layers.
+
+        This method updates cross edges of all such parents
+        from `layer` through `parent_layer`.
+        """
         updated_parents = set()
         cx_edges_d = self.cg.cache.cross_chunk_edges_cache[node]
-        for l in range(layer, parent_layer + 1):
-            layer_edges = cx_edges_d.get(l, types.empty_2d)
+        for _layer in range(layer, parent_layer + 1):
+            layer_edges = cx_edges_d.get(_layer, types.empty_2d)
             neighbors = layer_edges[:, 1]
             for n in neighbors:
                 if n in self._new_old_id_d:
@@ -608,12 +615,11 @@ def _update_skipped_neighbors(self, node, layer, parent_layer):
                     continue
                 res = self._update_neighbor_parents(n, parent_layer, updated_parents)
                 updated_parents.update(res)
-
         updated_entries = []
         for parent in updated_parents:
             val_dict = {}
-            for layer, edges in self.cg.cache.cross_chunk_edges_cache[parent].items():
-                val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
+            for _layer, edges in self.cg.cache.cross_chunk_edges_cache[parent].items():
+                val_dict[attributes.Connectivity.CrossChunkEdge[_layer]] = edges
             rkey = serialize_uint64(parent)
             row = self.cg.client.mutate_row(rkey, val_dict, time_stamp=self._time_stamp)
             updated_entries.append(row)

From e04a7eb166b9a4aa82102a1a4efbddc89eb068bc Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 14 Jan 2024 16:53:06 +0000
Subject: [PATCH 079/105]  fix: resolve column filter ambiguity

---
 pychunkedgraph/graph/chunkedgraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 8c3e14166..7b3c5d8f4 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -303,7 +303,7 @@ def get_atomic_cross_edges(self, l2_ids: typing.Iterable) -> typing.Dict:
             node_ids=l2_ids,
             properties=[
                 attributes.Connectivity.AtomicCrossChunkEdge[l]
-                for l in range(2, self.meta.layer_count)
+                for l in range(2, max(3, self.meta.layer_count))
             ],
         )
         result = {}

From 4e1ce089ad925e26c289cebfa833d03e674d4320 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhilesh@zetta.ai>
Date: Sun, 14 Jan 2024 20:20:18 +0000
Subject: [PATCH 080/105] fix: resolve column filter ambiguity(2)

---
 pychunkedgraph/graph/chunkedgraph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 7b3c5d8f4..7edc538df 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -336,7 +336,7 @@ def get_cross_chunk_edges(
                 return result
             attrs = [
                 attributes.Connectivity.CrossChunkEdge[l]
-                for l in range(2, self.meta.layer_count)
+                for l in range(2, max(3, self.meta.layer_count))
             ]
             node_edges_d_d = self.client.read_nodes(
                 node_ids=node_ids,

From 475cd4289893912548e7666b7c8583fc81bd7846 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 12 May 2024 10:35:48 -0500
Subject: [PATCH 081/105] V3 migration (#484)

* feat: convert edges to ocdbt

* feat: worker function to convert edges to ocdbt

* feat: ocdbt option, consolidate ingest cli

* fix(ingest): move fn to utils

* fix(ingest): move ocdbt setup to a fn

* add tensorstore req, fix build kaniko cache

* feat: copy fake_edges to column family 4

* feat: upgrade atomic chunks

* fix: rename abstract module to parent

* feat: upgrade higher layers, docs

* feat: upgrade cli, move common fns to utils

* add copy_fake_edges in upgrade fn

* handle earliest_timestamp, add test flag to upgrade

* fix: fake_edges serialize np.uint64

* add get_operation method, fix timestamp in repair, check for parent

* check for l2 ids invalidated by edit retries

* remove unnecessary parent assert

* remove unused vars

* ignore invalid ids, assert parent after earliest_ts

* check for ids invalidated by retries in higher layers

* parallelize update_cross_edges

* overwrite graph version, create col family 4

* improve status print formatting

* remove ununsed code, consolidate small common module

* efficient check for chunks not done

* check for empty chunks, use get_parents

* efficient get_edit_ts call by batching all children

* reduce earliest_ts calls

* combine bigtable calls, use numpy unique

* add completion rate command

* fix: ignore children without cross edges

* add span option to rate calculation

* reduce mem usage with global vars

* optimize cross edge reading

* use existing layer var

* limit cx edge reading above given layer

* fix: read for earliest_ts check only if true

* filter cross edges fn with timestamps

* remove git from dockerignore, print stats

* shuffle for better distribution of ids

* fix: use different var name for layer

* increase bigtable read timeout

* add message with assert

* fix: make span option int

* handle skipped connections

* fix: read cross edges at layer >= node_layer

* handle another case of skipped nodes

* check for unique parent count

* update graph_id in meta

* uncomment line

* make repair easier to use

* add sanity check for edits

* add sanity check for each layer

* add layers flag for cx edges

* use better names for functions and vars, update types, fix docs
---
 pychunkedgraph/app/__init__.py                |   2 +
 pychunkedgraph/debug/utils.py                 |  23 ++
 pychunkedgraph/graph/attributes.py            |   6 +
 pychunkedgraph/graph/chunkedgraph.py          |  78 ++++--
 pychunkedgraph/graph/client/base.py           |   2 +-
 .../graph/client/bigtable/client.py           |  37 ++-
 pychunkedgraph/graph/edges/__init__.py        |  94 ++++++-
 pychunkedgraph/graph/edits.py                 |  13 +-
 pychunkedgraph/ingest/__init__.py             |  22 +-
 pychunkedgraph/ingest/cli.py                  | 131 ++++------
 pychunkedgraph/ingest/cli_upgrade.py          | 143 +++++++++++
 pychunkedgraph/ingest/cluster.py              | 243 +++++++++++++-----
 pychunkedgraph/ingest/common.py               |  61 -----
 pychunkedgraph/ingest/create/atomic_layer.py  |   8 +-
 .../{abstract_layers.py => parent_layer.py}   |  10 +-
 pychunkedgraph/ingest/ran_agglomeration.py    |   8 +-
 pychunkedgraph/ingest/rq_cli.py               |  28 +-
 pychunkedgraph/ingest/simple_tests.py         |   3 +-
 pychunkedgraph/ingest/upgrade/__init__.py     |   0
 pychunkedgraph/ingest/upgrade/atomic_layer.py | 119 +++++++++
 pychunkedgraph/ingest/upgrade/parent_layer.py | 170 ++++++++++++
 pychunkedgraph/ingest/upgrade/utils.py        |  13 +
 pychunkedgraph/ingest/utils.py                | 135 +++++++++-
 pychunkedgraph/repair/edits.py                |   6 +-
 pychunkedgraph/tests/helpers.py               |  45 ++--
 pychunkedgraph/tests/test_uncategorized.py    |  84 +++---
 pychunkedgraph/utils/general.py               |  12 +-
 requirements.in                               |   1 +
 requirements.txt                              |   6 +
 29 files changed, 1100 insertions(+), 403 deletions(-)
 create mode 100644 pychunkedgraph/ingest/cli_upgrade.py
 delete mode 100644 pychunkedgraph/ingest/common.py
 rename pychunkedgraph/ingest/create/{abstract_layers.py => parent_layer.py} (98%)
 create mode 100644 pychunkedgraph/ingest/upgrade/__init__.py
 create mode 100644 pychunkedgraph/ingest/upgrade/atomic_layer.py
 create mode 100644 pychunkedgraph/ingest/upgrade/parent_layer.py
 create mode 100644 pychunkedgraph/ingest/upgrade/utils.py

diff --git a/pychunkedgraph/app/__init__.py b/pychunkedgraph/app/__init__.py
index 3e938628b..262849258 100644
--- a/pychunkedgraph/app/__init__.py
+++ b/pychunkedgraph/app/__init__.py
@@ -105,6 +105,8 @@ def configure_app(app):
         with app.app_context():
             from ..ingest.rq_cli import init_rq_cmds
             from ..ingest.cli import init_ingest_cmds
+            from ..ingest.cli_upgrade import init_upgrade_cmds
 
             init_rq_cmds(app)
             init_ingest_cmds(app)
+            init_upgrade_cmds(app)
diff --git a/pychunkedgraph/debug/utils.py b/pychunkedgraph/debug/utils.py
index 43562afd2..b1bdbc2be 100644
--- a/pychunkedgraph/debug/utils.py
+++ b/pychunkedgraph/debug/utils.py
@@ -35,3 +35,26 @@ def get_l2children(cg, node: np.uint64) -> np.ndarray:
         l2children.append(children[layers == 2])
         nodes = children[layers > 2]
     return np.concatenate(l2children)
+
+
+def sanity_check(cg, new_roots, operation_id):
+    """
+    Check for duplicates in hierarchy, useful for debugging.
+    """
+    print(f"{len(new_roots)} new ids from {operation_id}")
+    l2c_d = {}
+    for new_root in new_roots:
+        l2c_d[new_root] = get_l2children(cg, new_root)
+    success = True
+    for k, v in l2c_d.items():
+        success = success and (len(v) == np.unique(v).size)
+        print(f"{k}: {np.unique(v).size}, {len(v)}")
+    if not success:
+        raise RuntimeError("Some ids are not valid.")
+
+
+def sanity_check_single(cg, node, operation_id):
+    v = get_l2children(cg, node)
+    msg = f"invalid node {node}:"
+    msg += f" found {len(v)} l2 ids, must be {np.unique(v).size}"
+    assert np.unique(v).size == len(v), f"{msg}, from {operation_id}."
diff --git a/pychunkedgraph/graph/attributes.py b/pychunkedgraph/graph/attributes.py
index 33f675dc8..b431a159b 100644
--- a/pychunkedgraph/graph/attributes.py
+++ b/pychunkedgraph/graph/attributes.py
@@ -120,6 +120,12 @@ class Connectivity:
         ),
     )
 
+    FakeEdgesCF3 = _Attribute(
+        key=b"fake_edges",
+        family_id="3",
+        serializer=serializers.NumPyArray(dtype=basetypes.NODE_ID, shape=(-1, 2)),
+    )
+
     FakeEdges = _Attribute(
         key=b"fake_edges",
         family_id="4",
diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 7edc538df..7d1a24cc3 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -19,6 +19,7 @@
 from .meta import ChunkedGraphMeta
 from .utils import basetypes
 from .utils import id_helpers
+from .utils import serializers
 from .utils import generic as misc_utils
 from .edges import Edges
 from .edges import utils as edge_utils
@@ -76,7 +77,7 @@ def version(self) -> str:
         return self.client.read_graph_version()
 
     @property
-    def client(self) -> base.SimpleClient:
+    def client(self) -> BigTableClient:
         return self._client
 
     @property
@@ -287,9 +288,11 @@ def _get_children_multiple(
                 node_ids=node_ids, properties=attributes.Hierarchy.Child
             )
             return {
-                x: node_children_d[x][0].value
-                if x in node_children_d
-                else types.empty_1d.copy()
+                x: (
+                    node_children_d[x][0].value
+                    if x in node_children_d
+                    else types.empty_1d.copy()
+                )
                 for x in node_ids
             }
         return self.cache.children_multiple(node_ids)
@@ -322,6 +325,7 @@ def get_cross_chunk_edges(
         node_ids: typing.Iterable,
         *,
         raw_only=False,
+        all_layers=True,
         time_stamp: typing.Optional[datetime.datetime] = None,
     ) -> typing.Dict:
         """
@@ -334,21 +338,24 @@ def get_cross_chunk_edges(
             node_ids = np.array(node_ids, dtype=basetypes.NODE_ID)
             if node_ids.size == 0:
                 return result
-            attrs = [
-                attributes.Connectivity.CrossChunkEdge[l]
-                for l in range(2, max(3, self.meta.layer_count))
-            ]
+            layers = range(2, max(3, self.meta.layer_count))
+            attrs = [attributes.Connectivity.CrossChunkEdge[l] for l in layers]
             node_edges_d_d = self.client.read_nodes(
                 node_ids=node_ids,
                 properties=attrs,
                 end_time=time_stamp,
                 end_time_inclusive=True,
             )
-            for id_ in node_ids:
+            layers = self.get_chunk_layers(node_ids)
+            valid_layer = lambda x, y: x >= y
+            if not all_layers:
+                valid_layer = lambda x, y: x == y
+            for layer, id_ in zip(layers, node_ids):
                 try:
                     result[id_] = {
                         prop.index: val[0].value.copy()
                         for prop, val in node_edges_d_d[id_].items()
+                        if valid_layer(prop.index, layer)
                     }
                 except KeyError:
                     result[id_] = {}
@@ -631,9 +638,24 @@ def get_fake_edges(
             edges = np.concatenate(
                 [np.array(e.value, dtype=basetypes.NODE_ID, copy=False) for e in val]
             )
-            result[id_] = Edges(edges[:, 0], edges[:, 1], fake_edges=True)
+            result[id_] = Edges(edges[:, 0], edges[:, 1])
         return result
 
+    def copy_fake_edges(self, chunk_id: np.uint64) -> None:
+        _edges = self.client.read_node(
+            node_id=chunk_id,
+            properties=attributes.Connectivity.FakeEdgesCF3,
+            end_time_inclusive=True,
+            fake_edges=True,
+        )
+        mutations = []
+        _id = serializers.serialize_uint64(chunk_id, fake_edges=True)
+        for e in _edges:
+            val_dict = {attributes.Connectivity.FakeEdges: e.value}
+            row = self.client.mutate_row(_id, val_dict, time_stamp=e.timestamp)
+            mutations.append(row)
+        self.client.write(mutations)
+
     def get_l2_agglomerations(
         self, level2_ids: np.ndarray, edges_only: bool = False
     ) -> typing.Tuple[typing.Dict[int, types.Agglomeration], typing.Tuple[Edges]]:
@@ -690,13 +712,15 @@ def get_l2_agglomerations(
         )
         return (
             agglomeration_d,
-            (self.mock_edges,)
-            if self.mock_edges is not None
-            else (in_edges, out_edges, cross_edges),
+            (
+                (self.mock_edges,)
+                if self.mock_edges is not None
+                else (in_edges, out_edges, cross_edges)
+            ),
         )
 
     def get_node_timestamps(
-        self, node_ids: typing.Sequence[np.uint64], return_numpy=True
+        self, node_ids: typing.Sequence[np.uint64], return_numpy=True, normalize=False
     ) -> typing.Iterable:
         """
         The timestamp of the children column can be assumed
@@ -710,17 +734,22 @@ def get_node_timestamps(
             if return_numpy:
                 return np.array([], dtype=np.datetime64)
             return []
+        result = []
+        earliest_ts = self.get_earliest_timestamp()
+        for n in node_ids:
+            ts = children[n][0].timestamp
+            if normalize:
+                ts = earliest_ts if ts < earliest_ts else ts
+            result.append(ts)
         if return_numpy:
-            return np.array(
-                [children[x][0].timestamp for x in node_ids], dtype=np.datetime64
-            )
-        return [children[x][0].timestamp for x in node_ids]
+            return np.array(result, dtype=np.datetime64)
+        return result
 
     # OPERATIONS
     def add_edges(
         self,
         user_id: str,
-        atomic_edges: typing.Sequence[np.uint64],
+        atomic_edges: typing.Sequence[typing.Sequence[np.uint64]],
         *,
         affinities: typing.Sequence[np.float32] = None,
         source_coords: typing.Sequence[int] = None,
@@ -935,3 +964,14 @@ def get_earliest_timestamp(self):
             _, timestamp = self.client.read_log_entry(op_id)
             if timestamp is not None:
                 return timestamp - timedelta(milliseconds=500)
+
+    def get_operation_ids(self, node_ids: typing.Sequence):
+        response = self.client.read_nodes(node_ids=node_ids)
+        result = {}
+        for node in node_ids:
+            try:
+                operations = response[node][attributes.OperationLogs.OperationID]
+                result[node] = [(x.value, x.timestamp) for x in operations]
+            except KeyError:
+                ...
+        return result
diff --git a/pychunkedgraph/graph/client/base.py b/pychunkedgraph/graph/client/base.py
index a66602a6a..953734670 100644
--- a/pychunkedgraph/graph/client/base.py
+++ b/pychunkedgraph/graph/client/base.py
@@ -13,7 +13,7 @@ def create_graph(self) -> None:
         """Initialize the graph and store associated meta."""
 
     @abstractmethod
-    def add_graph_version(self, version):
+    def add_graph_version(self, version: str, overwrite: bool = False):
         """Add a version to the graph."""
 
     @abstractmethod
diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 6601b654e..52ec9a856 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -19,7 +19,7 @@
 from google.cloud.bigtable.column_family import MaxVersionsGCRule
 from google.cloud.bigtable.table import Table
 from google.cloud.bigtable.row_set import RowSet
-from google.cloud.bigtable.row_data import PartialRowData
+from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS, PartialRowData
 from google.cloud.bigtable.row_filters import RowFilter
 
 from . import utils
@@ -97,8 +97,9 @@ def create_graph(self, meta: ChunkedGraphMeta, version: str) -> None:
         self.add_graph_version(version)
         self.update_graph_meta(meta)
 
-    def add_graph_version(self, version: str):
-        assert self.read_graph_version() is None, "Graph has already been versioned."
+    def add_graph_version(self, version: str, overwrite: bool = False):
+        if not overwrite:
+            assert self.read_graph_version() is None, self.read_graph_version()
         self._version = version
         row = self.mutate_row(
             attributes.GraphVersion.key,
@@ -160,18 +161,25 @@ def read_nodes(
             # when all IDs in a block are within a range
             node_ids = np.sort(node_ids)
         rows = self._read_byte_rows(
-            start_key=serialize_uint64(start_id, fake_edges=fake_edges)
-            if start_id is not None
-            else None,
-            end_key=serialize_uint64(end_id, fake_edges=fake_edges)
-            if end_id is not None
-            else None,
+            start_key=(
+                serialize_uint64(start_id, fake_edges=fake_edges)
+                if start_id is not None
+                else None
+            ),
+            end_key=(
+                serialize_uint64(end_id, fake_edges=fake_edges)
+                if end_id is not None
+                else None
+            ),
             end_key_inclusive=end_id_inclusive,
             row_keys=(
-                serialize_uint64(node_id, fake_edges=fake_edges) for node_id in node_ids
-            )
-            if node_ids is not None
-            else None,
+                (
+                    serialize_uint64(node_id, fake_edges=fake_edges)
+                    for node_id in node_ids
+                )
+                if node_ids is not None
+                else None
+            ),
             columns=properties,
             start_time=start_time,
             end_time=end_time,
@@ -819,7 +827,8 @@ def _execute_read_thread(self, args: typing.Tuple[Table, RowSet, RowFilter]):
             # Check for everything falsy, because Bigtable considers even empty
             # lists of row_keys as no upper/lower bound!
             return {}
-        range_read = table.read_rows(row_set=row_set, filter_=row_filter)
+        retry = DEFAULT_RETRY_READ_ROWS.with_timeout(180)
+        range_read = table.read_rows(row_set=row_set, filter_=row_filter, retry=retry)
         res = {v.row_key: utils.partial_row_data_to_column_dict(v) for v in range_read}
         return res
 
diff --git a/pychunkedgraph/graph/edges/__init__.py b/pychunkedgraph/graph/edges/__init__.py
index b0e488d05..430ab9fa7 100644
--- a/pychunkedgraph/graph/edges/__init__.py
+++ b/pychunkedgraph/graph/edges/__init__.py
@@ -2,10 +2,14 @@
 Classes and types for edges
 """
 
-from typing import Optional
 from collections import namedtuple
+from os import environ
+from typing import Optional
 
 import numpy as np
+import tensorstore as ts
+import zstandard as zstd
+from graph_tool import Graph
 
 from ..utils import basetypes
 
@@ -18,6 +22,14 @@
 
 DEFAULT_AFFINITY = np.finfo(np.float32).tiny
 DEFAULT_AREA = np.finfo(np.float32).tiny
+ADJACENCY_DTYPE = np.dtype(
+    [
+        ("node", basetypes.NODE_ID),
+        ("aff", basetypes.EDGE_AFFINITY),
+        ("area", basetypes.EDGE_AREA),
+    ]
+)
+ZSTD_EDGE_COMPRESSION = 17
 
 
 class Edges:
@@ -28,17 +40,17 @@ def __init__(
         *,
         affinities: Optional[np.ndarray] = None,
         areas: Optional[np.ndarray] = None,
-        fake_edges=False,
     ):
         self.node_ids1 = np.array(node_ids1, dtype=basetypes.NODE_ID, copy=False)
         self.node_ids2 = np.array(node_ids2, dtype=basetypes.NODE_ID, copy=False)
         assert self.node_ids1.size == self.node_ids2.size
 
         self._as_pairs = None
-        self._fake_edges = fake_edges
 
         if affinities is not None and len(affinities) > 0:
-            self._affinities = np.array(affinities, dtype=basetypes.EDGE_AFFINITY, copy=False)
+            self._affinities = np.array(
+                affinities, dtype=basetypes.EDGE_AFFINITY, copy=False
+            )
             assert self.node_ids1.size == self._affinities.size
         else:
             self._affinities = np.full(len(self.node_ids1), DEFAULT_AFFINITY)
@@ -103,3 +115,77 @@ def get_pairs(self) -> np.ndarray:
             return self._as_pairs
         self._as_pairs = np.column_stack((self.node_ids1, self.node_ids2))
         return self._as_pairs
+
+
+def put_edges(destination: str, nodes: np.ndarray, edges: Edges) -> None:
+    graph_ids, _edges = np.unique(edges.get_pairs(), return_inverse=True)
+    graph_ids_reverse = {n: i for i, n in enumerate(graph_ids)}
+    _edges = _edges.reshape(-1, 2)
+
+    graph = Graph(directed=False)
+    graph.add_edge_list(_edges)
+    e_aff = graph.new_edge_property("double", vals=edges.affinities)
+    e_area = graph.new_edge_property("int", vals=edges.areas)
+    cctx = zstd.ZstdCompressor(level=ZSTD_EDGE_COMPRESSION)
+    ocdbt_host = environ["OCDBT_COORDINATOR_HOST"]
+    ocdbt_port = environ["OCDBT_COORDINATOR_PORT"]
+
+    spec = {
+        "driver": "ocdbt",
+        "base": destination,
+        "coordinator": {"address": f"{ocdbt_host}:{ocdbt_port}"},
+    }
+    dataset = ts.KvStore.open(spec).result()
+    with ts.Transaction() as txn:
+        for _node in nodes:
+            node = graph_ids_reverse[_node]
+            neighbors = graph.get_all_neighbors(node)
+            adjacency_list = np.zeros(neighbors.size, dtype=ADJACENCY_DTYPE)
+            adjacency_list["node"] = graph_ids[neighbors]
+            adjacency_list["aff"] = [e_aff[(node, neighbor)] for neighbor in neighbors]
+            adjacency_list["area"] = [
+                e_area[(node, neighbor)] for neighbor in neighbors
+            ]
+            dataset.with_transaction(txn)[str(graph_ids[node])] = cctx.compress(
+                adjacency_list.tobytes()
+            )
+
+
+def get_edges(source: str, nodes: np.ndarray) -> Edges:
+    spec = {"driver": "ocdbt", "base": source}
+    dataset = ts.KvStore.open(spec).result()
+    zdc = zstd.ZstdDecompressor()
+
+    read_futures = [dataset.read(str(n)) for n in nodes]
+    read_results = [rf.result() for rf in read_futures]
+    compressed = [rr.value for rr in read_results]
+
+    try:
+        n_threads = int(environ.get("ZSTD_THREADS", 1))
+    except ValueError:
+        n_threads = 1
+
+    decompressed = []
+    try:
+        decompressed = zdc.multi_decompress_to_buffer(compressed, threads=n_threads)
+    except ValueError:
+        for content in compressed:
+            decompressed.append(zdc.decompressobj().decompress(content))
+
+    node_ids1 = [np.empty(0, dtype=basetypes.NODE_ID)]
+    node_ids2 = [np.empty(0, dtype=basetypes.NODE_ID)]
+    affinities = [np.empty(0, dtype=basetypes.EDGE_AFFINITY)]
+    areas = [np.empty(0, dtype=basetypes.EDGE_AREA)]
+    for n, content in zip(nodes, compressed):
+        adjacency_list = np.frombuffer(content, dtype=ADJACENCY_DTYPE)
+        node_ids1.append([n] * adjacency_list.size)
+        node_ids2.append(adjacency_list["node"])
+        affinities.append(adjacency_list["aff"])
+        areas.append(adjacency_list["area"])
+
+    return Edges(
+        np.concatenate(node_ids1),
+        np.concatenate(node_ids2),
+        affinities=np.concatenate(affinities),
+        areas=np.concatenate(areas),
+    )
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 839db48b9..ee7e643c3 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -22,7 +22,7 @@
 from .utils.serializers import serialize_uint64
 from ..logging.log_db import TimeIt
 from ..utils.general import in2d
-from ..debug.utils import get_l2children
+from ..debug.utils import sanity_check, sanity_check_single
 
 
 def _init_old_hierarchy(cg, l2ids: np.ndarray, parent_ts: datetime.datetime = None):
@@ -246,9 +246,7 @@ def add_edges(
     )
 
     new_roots = create_parents.run()
-    for new_root in new_roots:
-        l2c = get_l2children(cg, new_root)
-        assert len(l2c) == np.unique(l2c).size, f"inconsistent result op {operation_id}"
+    sanity_check(cg, new_roots, operation_id)
     create_parents.create_new_entries()
     return new_roots, new_l2_ids, create_parents.new_entries
 
@@ -376,9 +374,7 @@ def remove_edges(
         parent_ts=parent_ts,
     )
     new_roots = create_parents.run()
-    for new_root in new_roots:
-        l2c = get_l2children(cg, new_root)
-        assert len(l2c) == np.unique(l2c).size, f"inconsistent result op {operation_id}"
+    sanity_check(cg, new_roots, operation_id)
     create_parents.create_new_entries()
     return new_roots, new_l2_ids, create_parents.new_entries
 
@@ -579,7 +575,7 @@ def _update_cross_edge_cache(self, parent, children):
                 continue
             edges = fastremap.remap(edges, edge_parents_d, preserve_missing_labels=True)
             new_cx_edges_d[layer] = np.unique(edges, axis=0)
-            assert np.all(edges[:, 0] == parent)
+            assert np.all(edges[:, 0] == parent), f"{parent}, {np.unique(edges[:, 0])}"
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
 
     def _update_neighbor_parents(self, neighbor, ceil_layer: int, updated: set) -> list:
@@ -661,6 +657,7 @@ def _create_new_parents(self, layer: int):
             self._update_id_lineage(parent, cc_ids, layer, parent_layer)
             self.cg.cache.children_cache[parent] = cc_ids
             cache_utils.update(self.cg.cache.parents_cache, cc_ids, parent)
+            sanity_check_single(self.cg, parent, self._operation_id)
             if update_skipped_neighbors:
                 res = self._update_skipped_neighbors(cc_ids[0], layer, parent_layer)
                 self.new_entries.extend(res)
diff --git a/pychunkedgraph/ingest/__init__.py b/pychunkedgraph/ingest/__init__.py
index b3d832d5e..55c10ca5f 100644
--- a/pychunkedgraph/ingest/__init__.py
+++ b/pychunkedgraph/ingest/__init__.py
@@ -1,32 +1,16 @@
+import logging
 from collections import namedtuple
 
-
-_cluster_ingest_config_fields = (
-    "ATOMIC_Q_NAME",
-    "ATOMIC_Q_LIMIT",
-    "ATOMIC_Q_INTERVAL",
-)
-_cluster_ingest_defaults = (
-    "l2",
-    100000,
-    120,
-)
-ClusterIngestConfig = namedtuple(
-    "ClusterIngestConfig",
-    _cluster_ingest_config_fields,
-    defaults=_cluster_ingest_defaults,
-)
-
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
 
 _ingestconfig_fields = (
-    "CLUSTER",  # cluster config
     "AGGLOMERATION",
     "WATERSHED",
     "USE_RAW_EDGES",
     "USE_RAW_COMPONENTS",
     "TEST_RUN",
 )
-_ingestconfig_defaults = (None, None, None, False, False, False)
+_ingestconfig_defaults = (None, None, False, False, False)
 IngestConfig = namedtuple(
     "IngestConfig", _ingestconfig_fields, defaults=_ingestconfig_defaults
 )
diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 67182fc81..928e1852f 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -4,29 +4,25 @@
 cli for running ingest
 """
 
-from os import environ
-from time import sleep
+import logging
 
 import click
 import yaml
 from flask.cli import AppGroup
-from rq import Queue
-from rq import Worker
-from rq.worker import WorkerStatus
-
-from .cluster import create_atomic_chunk
-from .cluster import create_parent_chunk
-from .cluster import enqueue_atomic_tasks
-from .cluster import randomize_grid_points
+
+from .cluster import create_atomic_chunk, create_parent_chunk, enqueue_l2_tasks
 from .manager import IngestionManager
-from .utils import bootstrap
-from .utils import chunk_id_str
+from .utils import (
+    bootstrap,
+    chunk_id_str,
+    print_completion_rate,
+    print_ingest_status,
+    queue_layer_helper,
+)
 from .simple_tests import run_all
-from .create.abstract_layers import add_layer
+from .create.parent_layer import add_parent_chunk
 from ..graph.chunkedgraph import ChunkedGraph
-from ..utils.redis import get_redis_connection
-from ..utils.redis import keys as r_keys
-from ..utils.general import chunked
+from ..utils.redis import get_redis_connection, keys as r_keys
 
 ingest_cli = AppGroup("ingest")
 
@@ -45,9 +41,9 @@ def flush_redis():
 @ingest_cli.command("graph")
 @click.argument("graph_id", type=str)
 @click.argument("dataset", type=click.Path(exists=True))
-@click.option("--raw", is_flag=True)
-@click.option("--test", is_flag=True)
-@click.option("--retry", is_flag=True)
+@click.option("--raw", is_flag=True, help="Read edges from agglomeration output.")
+@click.option("--test", is_flag=True, help="Test 8 chunks at the center of dataset.")
+@click.option("--retry", is_flag=True, help="Rerun without creating a new table.")
 def ingest_graph(
     graph_id: str, dataset: click.Path, raw: bool, test: bool, retry: bool
 ):
@@ -58,16 +54,16 @@ def ingest_graph(
     with open(dataset, "r") as stream:
         config = yaml.safe_load(stream)
 
-    meta, ingest_config, client_info = bootstrap(
-        graph_id,
-        config=config,
-        raw=raw,
-        test_run=test,
-    )
+    if test:
+        logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
+
+    meta, ingest_config, client_info = bootstrap(graph_id, config, raw, test)
     cg = ChunkedGraph(meta=meta, client_info=client_info)
     if not retry:
         cg.create()
-    enqueue_atomic_tasks(IngestionManager(ingest_config, meta))
+
+    imanager = IngestionManager(ingest_config, meta)
+    enqueue_l2_tasks(imanager, create_atomic_chunk)
 
 
 @ingest_cli.command("imanager")
@@ -100,22 +96,7 @@ def queue_layer(parent_layer):
     assert parent_layer > 2, "This command is for layers 3 and above."
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-
-    if parent_layer == imanager.cg_meta.layer_count:
-        chunk_coords = [(0, 0, 0)]
-    else:
-        bounds = imanager.cg_meta.layer_chunk_bounds[parent_layer]
-        chunk_coords = randomize_grid_points(*bounds)
-
-    for coords in chunk_coords:
-        task_q = imanager.get_task_queue(f"l{parent_layer}")
-        task_q.enqueue(
-            create_parent_chunk,
-            job_id=chunk_id_str(parent_layer, coords),
-            job_timeout=f"{int(parent_layer * parent_layer)}m",
-            result_ttl=0,
-            args=(parent_layer, coords),
-        )
+    queue_layer_helper(parent_layer, imanager, create_parent_chunk)
 
 
 @ingest_cli.command("status")
@@ -123,39 +104,7 @@ def ingest_status():
     """Print ingest status to console by layer."""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    layers = range(2, imanager.cg_meta.layer_count + 1)
-    layer_counts = imanager.cg_meta.layer_chunk_counts
-
-    pipeline = redis.pipeline()
-    worker_busy = []
-    for layer in layers:
-        pipeline.scard(f"{layer}c")
-        queue = Queue(f"l{layer}", connection=redis)
-        pipeline.llen(queue.key)
-        pipeline.zcard(queue.failed_job_registry.key)
-        workers = Worker.all(queue=queue)
-        worker_busy.append(sum([w.get_state() == WorkerStatus.BUSY for w in workers]))
-
-    results = pipeline.execute()
-    completed = []
-    queued = []
-    failed = []
-    for i in range(0, len(results), 3):
-        result = results[i : i + 3]
-        completed.append(result[0])
-        queued.append(result[1])
-        failed.append(result[2])
-
-    print(f"version: \t{imanager.cg.version}")
-    print(f"graph_id: \t{imanager.cg.graph_id}")
-    print(f"chunk_size: \t{imanager.cg.meta.graph_config.CHUNK_SIZE}")
-    print("\nlayer status:")
-    for layer, done, count in zip(layers, completed, layer_counts):
-        print(f"{layer}\t: {done} / {count}")
-
-    print("\n\nqueue status:")
-    for layer, q, f, wb in zip(layers, queued, failed, worker_busy):
-        print(f"l{layer}\t: queued: {q}\t\t failed: {f}\t\t busy: {wb}")
+    print_ingest_status(imanager, redis)
 
 
 @ingest_cli.command("chunk")
@@ -165,15 +114,14 @@ def ingest_chunk(queue: str, chunk_info):
     """Manually queue chunk when a job is stuck for whatever reason."""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    layer = chunk_info[0]
-    coords = chunk_info[1:]
-    queue = imanager.get_task_queue(queue)
+    layer, coords = chunk_info[0], chunk_info[1:]
+
+    func = create_parent_chunk
+    args = (layer, coords)
     if layer == 2:
         func = create_atomic_chunk
         args = (coords,)
-    else:
-        func = create_parent_chunk
-        args = (layer, coords)
+    queue = imanager.get_task_queue(queue)
     queue.enqueue(
         func,
         job_id=chunk_id_str(layer, coords),
@@ -189,16 +137,23 @@ def ingest_chunk(queue: str, chunk_info):
 @click.option("--n_threads", type=int, default=1)
 def ingest_chunk_local(graph_id: str, chunk_info, n_threads: int):
     """Manually ingest a chunk on a local machine."""
-    from .create.abstract_layers import add_layer
-    from .cluster import _create_atomic_chunk
-
-    if chunk_info[0] == 2:
-        _create_atomic_chunk(chunk_info[1:])
+    layer, coords = chunk_info[0], chunk_info[1:]
+    if layer == 2:
+        create_atomic_chunk(coords)
     else:
         cg = ChunkedGraph(graph_id=graph_id)
-        add_layer(cg, chunk_info[0], chunk_info[1:], n_threads=n_threads)
+        add_parent_chunk(cg, layer, coords, n_threads=n_threads)
     cg = ChunkedGraph(graph_id=graph_id)
-    add_layer(cg, chunk_info[0], chunk_info[1:], n_threads=n_threads)
+    add_parent_chunk(cg, layer, coords, n_threads=n_threads)
+
+
+@ingest_cli.command("rate")
+@click.argument("layer", type=int)
+@click.option("--span", default=10, help="Time span to calculate rate.")
+def rate(layer: int, span: int):
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    print_completion_rate(imanager, layer, span=span)
 
 
 @ingest_cli.command("run_tests")
diff --git a/pychunkedgraph/ingest/cli_upgrade.py b/pychunkedgraph/ingest/cli_upgrade.py
new file mode 100644
index 000000000..c77c0be64
--- /dev/null
+++ b/pychunkedgraph/ingest/cli_upgrade.py
@@ -0,0 +1,143 @@
+# pylint: disable=invalid-name, missing-function-docstring, unspecified-encoding
+
+"""
+cli for running upgrade
+"""
+
+import logging
+from time import sleep
+
+import click
+import tensorstore as ts
+from flask.cli import AppGroup
+from pychunkedgraph import __version__
+from pychunkedgraph.graph.meta import GraphConfig
+
+from . import IngestConfig
+from .cluster import (
+    convert_to_ocdbt,
+    enqueue_l2_tasks,
+    upgrade_atomic_chunk,
+    upgrade_parent_chunk,
+)
+from .manager import IngestionManager
+from .utils import (
+    chunk_id_str,
+    print_completion_rate,
+    print_ingest_status,
+    queue_layer_helper,
+    start_ocdbt_server,
+)
+from ..graph.chunkedgraph import ChunkedGraph, ChunkedGraphMeta
+from ..utils.redis import get_redis_connection
+from ..utils.redis import keys as r_keys
+
+upgrade_cli = AppGroup("upgrade")
+
+
+def init_upgrade_cmds(app):
+    app.cli.add_command(upgrade_cli)
+
+
+@upgrade_cli.command("flush_redis")
+def flush_redis():
+    """FLush redis db."""
+    redis = get_redis_connection()
+    redis.flushdb()
+
+
+@upgrade_cli.command("graph")
+@click.argument("graph_id", type=str)
+@click.option("--test", is_flag=True, help="Test 8 chunks at the center of dataset.")
+@click.option("--ocdbt", is_flag=True, help="Store edges using ts ocdbt kv store.")
+def upgrade_graph(graph_id: str, test: bool, ocdbt: bool):
+    """
+    Main upgrade command.
+    Takes upgrade config from a yaml file and queues atomic tasks.
+    """
+    ingest_config = IngestConfig(TEST_RUN=test)
+    cg = ChunkedGraph(graph_id=graph_id)
+    cg.client.add_graph_version(__version__, overwrite=True)
+
+    if graph_id != cg.graph_id:
+        gc = cg.meta.graph_config._asdict()
+        gc["ID"] = graph_id
+        new_meta = ChunkedGraphMeta(
+            GraphConfig(**gc), cg.meta.data_source, cg.meta.custom_data
+        )
+        cg.update_meta(new_meta, overwrite=True)
+        cg = ChunkedGraph(graph_id=graph_id)
+
+    try:
+        # create new column family for cross chunk edges
+        f = cg.client._table.column_family("4")
+        f.create()
+    except Exception:
+        ...
+
+    imanager = IngestionManager(ingest_config, cg.meta)
+    server = ts.ocdbt.DistributedCoordinatorServer()
+    if ocdbt:
+        start_ocdbt_server(imanager, server)
+
+    fn = convert_to_ocdbt if ocdbt else upgrade_atomic_chunk
+    enqueue_l2_tasks(imanager, fn)
+
+    if ocdbt:
+        logging.info("All tasks queued. Keep this alive for ocdbt coordinator server.")
+        while True:
+            sleep(60)
+
+
+@upgrade_cli.command("layer")
+@click.argument("parent_layer", type=int)
+def queue_layer(parent_layer):
+    """
+    Queue all chunk tasks at a given layer.
+    Must be used when all the chunks at `parent_layer - 1` have completed.
+    """
+    assert parent_layer > 2, "This command is for layers 3 and above."
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    queue_layer_helper(parent_layer, imanager, upgrade_parent_chunk)
+
+
+@upgrade_cli.command("status")
+def ingest_status():
+    """Print upgrade status to console."""
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    print_ingest_status(imanager, redis, upgrade=True)
+
+
+@upgrade_cli.command("chunk")
+@click.argument("queue", type=str)
+@click.argument("chunk_info", nargs=4, type=int)
+def ingest_chunk(queue: str, chunk_info):
+    """Manually queue chunk when a job is stuck for whatever reason."""
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    layer, coords = chunk_info[0], chunk_info[1:]
+
+    func = upgrade_parent_chunk
+    args = (layer, coords)
+    if layer == 2:
+        func = upgrade_atomic_chunk
+        args = (coords,)
+    queue = imanager.get_task_queue(queue)
+    queue.enqueue(
+        func,
+        job_id=chunk_id_str(layer, coords),
+        job_timeout=f"{int(layer * layer)}m",
+        result_ttl=0,
+        args=args,
+    )
+
+
+@upgrade_cli.command("rate")
+@click.argument("layer", type=int)
+@click.option("--span", default=10, help="Time span to calculate rate.")
+def rate(layer: int, span: int):
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    print_completion_rate(imanager, layer, span=span)
diff --git a/pychunkedgraph/ingest/cluster.py b/pychunkedgraph/ingest/cluster.py
index a5c6a9861..485251568 100644
--- a/pychunkedgraph/ingest/cluster.py
+++ b/pychunkedgraph/ingest/cluster.py
@@ -1,23 +1,37 @@
 # pylint: disable=invalid-name, missing-function-docstring, import-outside-toplevel
 
 """
-Ingest / create chunkedgraph with workers.
+Ingest / create chunkedgraph with workers on a cluster.
 """
 
-from typing import Sequence, Tuple
+import logging
+from os import environ
+from time import sleep
+from typing import Callable, Dict, Iterable, Tuple, Sequence
 
 import numpy as np
+from rq import Queue as RQueue
 
-from .utils import chunk_id_str
+
+from .utils import chunk_id_str, get_chunks_not_done, randomize_grid_points
 from .manager import IngestionManager
-from .common import get_atomic_chunk_data
-from .ran_agglomeration import get_active_edges
-from .create.atomic_layer import add_atomic_edges
-from .create.abstract_layers import add_layer
-from ..graph.meta import ChunkedGraphMeta
+from .ran_agglomeration import (
+    get_active_edges,
+    read_raw_edge_data,
+    read_raw_agglomeration_data,
+)
+from .create.atomic_layer import add_atomic_chunk
+from .create.parent_layer import add_parent_chunk
+from .upgrade.atomic_layer import update_chunk as update_atomic_chunk
+from .upgrade.parent_layer import update_chunk as update_parent_chunk
+from ..graph.edges import EDGE_TYPES, Edges, put_edges
+from ..graph import ChunkedGraph, ChunkedGraphMeta
 from ..graph.chunks.hierarchy import get_children_chunk_coords
-from ..utils.redis import keys as r_keys
-from ..utils.redis import get_redis_connection
+from ..graph.utils.basetypes import NODE_ID
+from ..io.edges import get_chunk_edges
+from ..io.components import get_chunk_components
+from ..utils.redis import keys as r_keys, get_redis_connection
+from ..utils.general import chunked
 
 
 def _post_task_completion(
@@ -36,7 +50,7 @@ def create_parent_chunk(
 ) -> None:
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    add_layer(
+    add_parent_chunk(
         imanager.cg,
         parent_layer,
         parent_coords,
@@ -49,54 +63,61 @@ def create_parent_chunk(
     _post_task_completion(imanager, parent_layer, parent_coords)
 
 
-def randomize_grid_points(X: int, Y: int, Z: int) -> Tuple[int, int, int]:
-    indices = np.arange(X * Y * Z)
-    np.random.shuffle(indices)
-    for index in indices:
-        yield np.unravel_index(index, (X, Y, Z))
+def upgrade_parent_chunk(
+    parent_layer: int,
+    parent_coords: Sequence[int],
+) -> None:
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    update_parent_chunk(imanager.cg, parent_coords, layer=parent_layer)
+    _post_task_completion(imanager, parent_layer, parent_coords)
 
 
-def enqueue_atomic_tasks(imanager: IngestionManager):
-    from os import environ
-    from time import sleep
-    from rq import Queue as RQueue
+def _get_atomic_chunk_data(
+    imanager: IngestionManager, coord: Sequence[int]
+) -> Tuple[Dict, Dict]:
+    """
+    Helper to read either raw data or processed data
+    If reading from raw data, save it as processed data
+    """
+    chunk_edges = (
+        read_raw_edge_data(imanager, coord)
+        if imanager.config.USE_RAW_EDGES
+        else get_chunk_edges(imanager.cg_meta.data_source.EDGES, [coord])
+    )
 
-    chunk_coords = _get_test_chunks(imanager.cg.meta)
-    chunk_count = len(chunk_coords)
-    if not imanager.config.TEST_RUN:
-        atomic_chunk_bounds = imanager.cg_meta.layer_chunk_bounds[2]
-        chunk_coords = randomize_grid_points(*atomic_chunk_bounds)
-        chunk_count = imanager.cg_meta.layer_chunk_counts[0]
-    print(f"total chunk count: {chunk_count}, queuing...")
+    _check_edges_direction(chunk_edges, imanager.cg, coord)
+
+    mapping = (
+        read_raw_agglomeration_data(imanager, coord)
+        if imanager.config.USE_RAW_COMPONENTS
+        else get_chunk_components(imanager.cg_meta.data_source.COMPONENTS, coord)
+    )
+    return chunk_edges, mapping
 
-    queue_name = f"{imanager.config.CLUSTER.ATOMIC_Q_NAME}"
-    q = imanager.get_task_queue(queue_name)
-    job_datas = []
-    batch_size = int(environ.get("L2JOB_BATCH_SIZE", 1000))
-    for chunk_coord in chunk_coords:
-        # buffer for optimal use of redis memory
-        if len(q) > imanager.config.CLUSTER.ATOMIC_Q_LIMIT:
-            print(f"Sleeping {imanager.config.CLUSTER.ATOMIC_Q_INTERVAL}s...")
-            sleep(imanager.config.CLUSTER.ATOMIC_Q_INTERVAL)
-
-        x, y, z = chunk_coord
-        chunk_str = f"{x}_{y}_{z}"
-        if imanager.redis.sismember("2c", chunk_str):
-            # already done, skip
-            continue
-        job_datas.append(
-            RQueue.prepare_data(
-                create_atomic_chunk,
-                args=(chunk_coord,),
-                timeout=environ.get("L2JOB_TIMEOUT", "3m"),
-                result_ttl=0,
-                job_id=chunk_id_str(2, chunk_coord),
-            )
-        )
-        if len(job_datas) % batch_size == 0:
-            q.enqueue_many(job_datas)
-            job_datas = []
-    q.enqueue_many(job_datas)
+
+def _check_edges_direction(
+    chunk_edges: dict, cg: ChunkedGraph, coord: Sequence[int]
+) -> None:
+    """
+    For between and cross chunk edges:
+    Checks and flips edges such that nodes1 are always within a chunk and nodes2 outside the chunk.
+    Where nodes1 = edges[:,0] and nodes2 = edges[:,1].
+    """
+    x, y, z = coord
+    chunk_id = cg.get_chunk_id(layer=1, x=x, y=y, z=z)
+    for edge_type in [EDGE_TYPES.between_chunk, EDGE_TYPES.cross_chunk]:
+        edges = chunk_edges[edge_type]
+        e1 = edges.node_ids1
+        e2 = edges.node_ids2
+
+        e2_chunk_ids = cg.get_chunk_ids_from_node_ids(e2)
+        mask = e2_chunk_ids == chunk_id
+        e1[mask], e2[mask] = e2[mask], e1[mask]
+
+        e1_chunk_ids = cg.get_chunk_ids_from_node_ids(e1)
+        mask = e1_chunk_ids == chunk_id
+        assert np.all(mask), "all IDs must belong to same chunk"
 
 
 def create_atomic_chunk(coords: Sequence[int]):
@@ -105,22 +126,110 @@ def create_atomic_chunk(coords: Sequence[int]):
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
     coords = np.array(list(coords), dtype=int)
 
-    chunk_edges_all, mapping = get_atomic_chunk_data(imanager, coords)
+    chunk_edges_all, mapping = _get_atomic_chunk_data(imanager, coords)
     chunk_edges_active, isolated_ids = get_active_edges(chunk_edges_all, mapping)
-    add_atomic_edges(imanager.cg, coords, chunk_edges_active, isolated=isolated_ids)
-
-    if imanager.config.TEST_RUN:
-        # print for debugging
-        for k, v in chunk_edges_all.items():
-            print(k, len(v))
-        for k, v in chunk_edges_active.items():
-            print(f"active_{k}", len(v))
+    add_atomic_chunk(imanager.cg, coords, chunk_edges_active, isolated=isolated_ids)
+
+    for k, v in chunk_edges_all.items():
+        logging.debug(f"{k}: {len(v)}")
+    for k, v in chunk_edges_active.items():
+        logging.debug(f"active_{k}: {len(v)}")
+    _post_task_completion(imanager, 2, coords)
+
+
+def upgrade_atomic_chunk(coords: Sequence[int]):
+    """Upgrades single atomic chunk"""
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    coords = np.array(list(coords), dtype=int)
+    update_atomic_chunk(imanager.cg, coords, layer=2)
+    _post_task_completion(imanager, 2, coords)
+
+
+def convert_to_ocdbt(coords: Sequence[int]):
+    """
+    Convert edges stored per chunk to ajacency list in the tensorstore ocdbt kv store.
+    """
+    redis = get_redis_connection()
+    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+    coords = np.array(list(coords), dtype=int)
+    chunk_edges_all, mapping = _get_atomic_chunk_data(imanager, coords)
+
+    node_ids1 = []
+    node_ids2 = []
+    affinities = []
+    areas = []
+    for edges in chunk_edges_all.values():
+        node_ids1.extend(edges.node_ids1)
+        node_ids2.extend(edges.node_ids2)
+        affinities.extend(edges.affinities)
+        areas.extend(edges.areas)
+
+    edges = Edges(node_ids1, node_ids2, affinities=affinities, areas=areas)
+    nodes = np.concatenate(
+        [edges.node_ids1, edges.node_ids2, np.fromiter(mapping.keys(), dtype=NODE_ID)]
+    )
+    nodes = np.unique(nodes)
+
+    chunk_id = imanager.cg.get_chunk_id(layer=1, x=coords[0], y=coords[1], z=coords[2])
+    chunk_ids = imanager.cg.get_chunk_ids_from_node_ids(nodes)
+
+    host = imanager.redis.get("OCDBT_COORDINATOR_HOST").decode()
+    port = imanager.redis.get("OCDBT_COORDINATOR_PORT").decode()
+    environ["OCDBT_COORDINATOR_HOST"] = host
+    environ["OCDBT_COORDINATOR_PORT"] = port
+    logging.info(f"OCDBT Coordinator address {host}:{port}")
+
+    put_edges(
+        f"{imanager.cg.meta.data_source.EDGES}/ocdbt",
+        nodes[chunk_ids == chunk_id],
+        edges,
+    )
     _post_task_completion(imanager, 2, coords)
 
 
 def _get_test_chunks(meta: ChunkedGraphMeta):
-    """Chunks at center of the dataset most likely not to be empty"""
+    """Chunks at the center most likely not to be empty"""
     parent_coords = np.array(meta.layer_chunk_bounds[3]) // 2
     return get_children_chunk_coords(meta, 3, parent_coords)
-    # f = lambda r1, r2, r3: np.array(np.meshgrid(r1, r2, r3), dtype=int).T.reshape(-1, 3)
-    # return f((x, x + 1), (y, y + 1), (z, z + 1))
+
+
+def _queue_tasks(imanager: IngestionManager, chunk_fn: Callable, coords: Iterable):
+    queue_name = "l2"
+    q = imanager.get_task_queue(queue_name)
+    batch_size = int(environ.get("JOB_BATCH_SIZE", 100000))
+    batches = chunked(coords, batch_size)
+    for batch in batches:
+        _coords = get_chunks_not_done(imanager, 2, batch)
+        # buffer for optimal use of redis memory
+        if len(q) > int(environ.get("QUEUE_SIZE", 100000)):
+            interval = int(environ.get("QUEUE_INTERVAL", 300))
+            logging.info(f"Queue full; sleeping {interval}s...")
+            sleep(interval)
+
+        job_datas = []
+        for chunk_coord in _coords:
+            job_datas.append(
+                RQueue.prepare_data(
+                    chunk_fn,
+                    args=(chunk_coord,),
+                    timeout=environ.get("L2JOB_TIMEOUT", "3m"),
+                    result_ttl=0,
+                    job_id=chunk_id_str(2, chunk_coord),
+                )
+            )
+        q.enqueue_many(job_datas)
+
+
+def enqueue_l2_tasks(imanager: IngestionManager, chunk_fn: Callable):
+    """
+    `chunk_fn`: function to process a given layer 2 chunk.
+    """
+    chunk_coords = _get_test_chunks(imanager.cg.meta)
+    chunk_count = len(chunk_coords)
+    if not imanager.config.TEST_RUN:
+        atomic_chunk_bounds = imanager.cg_meta.layer_chunk_bounds[2]
+        chunk_coords = randomize_grid_points(*atomic_chunk_bounds)
+        chunk_count = imanager.cg_meta.layer_chunk_counts[0]
+    logging.info(f"Chunk count: {chunk_count}, queuing...")
+    _queue_tasks(imanager, chunk_fn, chunk_coords)
diff --git a/pychunkedgraph/ingest/common.py b/pychunkedgraph/ingest/common.py
deleted file mode 100644
index dccf58602..000000000
--- a/pychunkedgraph/ingest/common.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from typing import Dict
-from typing import Tuple
-from typing import Sequence
-
-from .manager import IngestionManager
-from .ran_agglomeration import read_raw_edge_data
-from .ran_agglomeration import read_raw_agglomeration_data
-from ..graph import ChunkedGraph
-from ..io.edges import get_chunk_edges
-from ..io.components import get_chunk_components
-
-
-def get_atomic_chunk_data(
-    imanager: IngestionManager, coord: Sequence[int]
-) -> Tuple[Dict, Dict]:
-    """
-    Helper to read either raw data or processed data
-    If reading from raw data, save it as processed data
-    """
-    chunk_edges = (
-        read_raw_edge_data(imanager, coord)
-        if imanager.config.USE_RAW_EDGES
-        else get_chunk_edges(imanager.cg_meta.data_source.EDGES, [coord])
-    )
-
-    _check_edges_direction(chunk_edges, imanager.cg, coord)
-
-    mapping = (
-        read_raw_agglomeration_data(imanager, coord)
-        if imanager.config.USE_RAW_COMPONENTS
-        else get_chunk_components(imanager.cg_meta.data_source.COMPONENTS, coord)
-    )
-    return chunk_edges, mapping
-
-
-def _check_edges_direction(
-    chunk_edges: dict, cg: ChunkedGraph, coord: Sequence[int]
-) -> None:
-    """
-    For between and cross chunk edges:
-    Checks and flips edges such that nodes1 are always within a chunk and nodes2 outside the chunk.
-    Where nodes1 = edges[:,0] and nodes2 = edges[:,1].
-    """
-    import numpy as np
-    from ..graph.edges import Edges
-    from ..graph.edges import EDGE_TYPES
-
-    x, y, z = coord
-    chunk_id = cg.get_chunk_id(layer=1, x=x, y=y, z=z)
-    for edge_type in [EDGE_TYPES.between_chunk, EDGE_TYPES.cross_chunk]:
-        edges = chunk_edges[edge_type]
-        e1 = edges.node_ids1
-        e2 = edges.node_ids2
-
-        e2_chunk_ids = cg.get_chunk_ids_from_node_ids(e2)
-        mask = e2_chunk_ids == chunk_id
-        e1[mask], e2[mask] = e2[mask], e1[mask]
-
-        e1_chunk_ids = cg.get_chunk_ids_from_node_ids(e1)
-        mask = e1_chunk_ids == chunk_id
-        assert np.all(mask), "all IDs must belong to same chunk"
diff --git a/pychunkedgraph/ingest/create/atomic_layer.py b/pychunkedgraph/ingest/create/atomic_layer.py
index 054a82840..0a7aae728 100644
--- a/pychunkedgraph/ingest/create/atomic_layer.py
+++ b/pychunkedgraph/ingest/create/atomic_layer.py
@@ -23,9 +23,9 @@
 from ...graph.utils.flatgraph import connected_components
 
 
-def add_atomic_edges(
+def add_atomic_chunk(
     cg: ChunkedGraph,
-    chunk_coord: np.ndarray,
+    coords: Sequence[int],
     chunk_edges_d: Dict[str, Edges],
     isolated: Sequence[int],
     time_stamp: Optional[datetime.datetime] = None,
@@ -40,9 +40,7 @@ def add_atomic_edges(
     graph, _, _, unique_ids = build_gt_graph(chunk_edge_ids, make_directed=True)
     ccs = connected_components(graph)
 
-    parent_chunk_id = cg.get_chunk_id(
-        layer=2, x=chunk_coord[0], y=chunk_coord[1], z=chunk_coord[2]
-    )
+    parent_chunk_id = cg.get_chunk_id(layer=2, x=coords[0], y=coords[1], z=coords[2])
     parent_ids = cg.id_client.create_node_ids(parent_chunk_id, size=len(ccs))
 
     sparse_indices, remapping = _get_remapping(chunk_edges_d)
diff --git a/pychunkedgraph/ingest/create/abstract_layers.py b/pychunkedgraph/ingest/create/parent_layer.py
similarity index 98%
rename from pychunkedgraph/ingest/create/abstract_layers.py
rename to pychunkedgraph/ingest/create/parent_layer.py
index adbe4a5ab..09be61407 100644
--- a/pychunkedgraph/ingest/create/abstract_layers.py
+++ b/pychunkedgraph/ingest/create/parent_layer.py
@@ -29,20 +29,20 @@
 from .cross_edges import get_chunk_nodes_cross_edge_layer
 
 
-def add_layer(
+def add_parent_chunk(
     cg: ChunkedGraph,
     layer_id: int,
-    parent_coords: Sequence[int],
+    coords: Sequence[int],
     children_coords: Sequence[Sequence[int]] = np.array([]),
     *,
     time_stamp: Optional[datetime.datetime] = None,
     n_threads: int = 4,
 ) -> None:
     if not children_coords.size:
-        children_coords = get_children_chunk_coords(cg.meta, layer_id, parent_coords)
+        children_coords = get_children_chunk_coords(cg.meta, layer_id, coords)
     children_ids = _read_children_chunks(cg, layer_id, children_coords, n_threads > 1)
     cx_edges = get_children_chunk_cross_edges(
-        cg, layer_id, parent_coords, use_threads=n_threads > 1
+        cg, layer_id, coords, use_threads=n_threads > 1
     )
 
     node_layers = cg.get_chunk_layers(children_ids)
@@ -59,7 +59,7 @@ def add_layer(
     _write_connected_components(
         cg,
         layer_id,
-        parent_coords,
+        coords,
         connected_components,
         get_valid_timestamp(time_stamp),
         n_threads > 1,
diff --git a/pychunkedgraph/ingest/ran_agglomeration.py b/pychunkedgraph/ingest/ran_agglomeration.py
index 7c4af51f7..a0ca42d54 100644
--- a/pychunkedgraph/ingest/ran_agglomeration.py
+++ b/pychunkedgraph/ingest/ran_agglomeration.py
@@ -5,10 +5,7 @@
 
 from collections import defaultdict
 from itertools import product
-from typing import Dict
-from typing import Iterable
-from typing import Tuple
-from typing import Union
+from typing import Dict, Iterable, Tuple, Union
 from binascii import crc32
 
 
@@ -23,8 +20,7 @@
 from ..io.edges import put_chunk_edges
 from ..io.components import put_chunk_components
 from ..graph.utils import basetypes
-from ..graph.edges import Edges
-from ..graph.edges import EDGE_TYPES
+from ..graph.edges import EDGE_TYPES, Edges
 from ..graph.types import empty_2d
 from ..graph.chunks.utils import get_chunk_id
 
diff --git a/pychunkedgraph/ingest/rq_cli.py b/pychunkedgraph/ingest/rq_cli.py
index c9b21ae36..6a1a4882d 100644
--- a/pychunkedgraph/ingest/rq_cli.py
+++ b/pychunkedgraph/ingest/rq_cli.py
@@ -8,8 +8,6 @@
 import click
 from redis import Redis
 from rq import Queue
-from rq import Worker
-from rq.worker import WorkerStatus
 from rq.job import Job
 from rq.exceptions import InvalidJobOperationError
 from rq.exceptions import NoSuchJobError
@@ -27,23 +25,6 @@
 connection = Redis(host=REDIS_HOST, port=REDIS_PORT, db=0, password=REDIS_PASSWORD)
 
 
-@rq_cli.command("status")
-@click.argument("queues", nargs=-1, type=str)
-@click.option("--show-busy", is_flag=True)
-def get_status(queues, show_busy):
-    print("NOTE: Use --show-busy to display count of non idle workers\n")
-    for queue in queues:
-        q = Queue(queue, connection=connection)
-        print(f"Queue name \t: {queue}")
-        print(f"Jobs queued \t: {len(q)}")
-        print(f"Workers total \t: {Worker.count(queue=q)}")
-        if show_busy:
-            workers = Worker.all(queue=q)
-            count = sum([worker.get_state() == WorkerStatus.BUSY for worker in workers])
-            print(f"Workers busy \t: {count}")
-        print(f"Jobs failed \t: {q.failed_job_registry.count}\n")
-
-
 @rq_cli.command("failed")
 @click.argument("queue", type=str)
 @click.argument("job_ids", nargs=-1)
@@ -129,9 +110,14 @@ def clean_start_registry(queue):
 def clear_failed_registry(queue):
     failed_job_registry = FailedJobRegistry(queue, connection=connection)
     job_ids = failed_job_registry.get_job_ids()
+    count = 0
     for job_id in job_ids:
-        failed_job_registry.remove(job_id, delete_job=True)
-    print(f"Deleted {len(job_ids)} jobs from the failed job registry.")
+        try:
+            failed_job_registry.remove(job_id, delete_job=True)
+            count += 1
+        except Exception:
+            ...
+    print(f"Deleted {count} jobs from the failed job registry.")
 
 
 def init_rq_cmds(app):
diff --git a/pychunkedgraph/ingest/simple_tests.py b/pychunkedgraph/ingest/simple_tests.py
index 33946bcec..07a60f5f3 100644
--- a/pychunkedgraph/ingest/simple_tests.py
+++ b/pychunkedgraph/ingest/simple_tests.py
@@ -7,8 +7,7 @@
 from datetime import datetime
 import numpy as np
 
-from pychunkedgraph.graph import ChunkedGraph
-from pychunkedgraph.graph import attributes
+from pychunkedgraph.graph import attributes, ChunkedGraph
 
 
 def family(cg: ChunkedGraph):
diff --git a/pychunkedgraph/ingest/upgrade/__init__.py b/pychunkedgraph/ingest/upgrade/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/pychunkedgraph/ingest/upgrade/atomic_layer.py b/pychunkedgraph/ingest/upgrade/atomic_layer.py
new file mode 100644
index 000000000..96f7f71bd
--- /dev/null
+++ b/pychunkedgraph/ingest/upgrade/atomic_layer.py
@@ -0,0 +1,119 @@
+# pylint: disable=invalid-name, missing-docstring, c-extension-no-member
+from datetime import timedelta
+
+import fastremap
+import numpy as np
+from pychunkedgraph.graph import ChunkedGraph
+from pychunkedgraph.graph.attributes import Connectivity
+from pychunkedgraph.graph.attributes import Hierarchy
+from pychunkedgraph.graph.utils import serializers
+
+from .utils import exists_as_parent
+
+
+def get_parent_timestamps(cg, supervoxels, start_time=None, end_time=None) -> set:
+    """
+    Timestamps of when the given supervoxels were edited, in the given time range.
+    """
+    response = cg.client.read_nodes(
+        node_ids=supervoxels,
+        start_time=start_time,
+        end_time=end_time,
+        end_time_inclusive=False,
+    )
+    result = set()
+    for v in response.values():
+        for cell in v[Hierarchy.Parent]:
+            valid = cell.timestamp >= start_time or cell.timestamp < end_time
+            assert valid, f"{cell.timestamp}, {start_time}"
+            result.add(cell.timestamp)
+    return result
+
+
+def get_edit_timestamps(cg: ChunkedGraph, edges_d, start_ts, end_ts) -> list:
+    """
+    Timestamps of when post-side supervoxels were involved in an edit.
+    Post-side - supervoxels in the neighbor chunk.
+    This is required because we need to update edges from both sides.
+    """
+    atomic_cx_edges = np.concatenate(list(edges_d.values()))
+    timestamps = get_parent_timestamps(
+        cg, atomic_cx_edges[:, 1], start_time=start_ts, end_time=end_ts
+    )
+    timestamps.add(start_ts)
+    return sorted(timestamps)
+
+
+def update_cross_edges(cg: ChunkedGraph, node, cx_edges_d, node_ts, end_ts) -> list:
+    """
+    Helper function to update a single L2 ID.
+    Returns a list of mutations with given timestamps.
+    """
+    rows = []
+    edges = np.concatenate(list(cx_edges_d.values()))
+    uparents = np.unique(cg.get_parents(edges[:, 0], time_stamp=node_ts))
+    assert uparents.size <= 1, f"{node}, {node_ts}, {uparents}"
+    if uparents.size == 0 or node != uparents[0]:
+        # if node is not the parent at this ts, it must be invalid
+        assert not exists_as_parent(cg, node, edges[:, 0])
+        return rows
+
+    timestamps = [node_ts]
+    if node_ts != end_ts:
+        timestamps = get_edit_timestamps(cg, cx_edges_d, node_ts, end_ts)
+    for ts in timestamps:
+        val_dict = {}
+        svs = edges[:, 1]
+        parents = cg.get_parents(svs, time_stamp=ts)
+        edge_parents_d = dict(zip(svs, parents))
+        for layer, layer_edges in cx_edges_d.items():
+            layer_edges = fastremap.remap(
+                layer_edges, edge_parents_d, preserve_missing_labels=True
+            )
+            layer_edges[:, 0] = node
+            layer_edges = np.unique(layer_edges, axis=0)
+            col = Connectivity.CrossChunkEdge[layer]
+            val_dict[col] = layer_edges
+        row_id = serializers.serialize_uint64(node)
+        rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp=ts))
+    return rows
+
+
+def update_chunk(cg: ChunkedGraph, chunk_coords: list[int], layer: int = 2):
+    """
+    Iterate over all L2 IDs in a chunk and update their cross chunk edges,
+    within the periods they were valid/active.
+    """
+    x, y, z = chunk_coords
+    chunk_id = cg.get_chunk_id(layer=layer, x=x, y=y, z=z)
+    cg.copy_fake_edges(chunk_id)
+    rr = cg.range_read_chunk(chunk_id)
+    nodes = list(rr.keys())
+
+    # get start_ts when node becomes valid
+    nodes_ts = cg.get_node_timestamps(nodes, return_numpy=False, normalize=True)
+    cx_edges_d = cg.get_atomic_cross_edges(nodes)
+    children_d = cg.get_children(nodes)
+
+    rows = []
+    for node, start_ts in zip(nodes, nodes_ts):
+        if cg.get_parent(node) is None:
+            # invalid id caused by failed ingest task
+            continue
+        node_cx_edges_d = cx_edges_d.get(node, {})
+        if not node_cx_edges_d:
+            continue
+
+        # get end_ts when node becomes invalid (bigtable resolution is in ms)
+        start = start_ts + timedelta(milliseconds=1)
+        _timestamps = get_parent_timestamps(cg, children_d[node], start_time=start)
+        try:
+            end_ts = sorted(_timestamps)[0]
+        except IndexError:
+            # start_ts == end_ts means there has been no edit involving this node
+            # meaning only one timestamp to update cross edges, start_ts
+            end_ts = start_ts
+        # for each timestamp until end_ts, update cross chunk edges of node
+        _rows = update_cross_edges(cg, node, node_cx_edges_d, start_ts, end_ts)
+        rows.extend(_rows)
+    cg.client.write(rows)
diff --git a/pychunkedgraph/ingest/upgrade/parent_layer.py b/pychunkedgraph/ingest/upgrade/parent_layer.py
new file mode 100644
index 000000000..8674e45b7
--- /dev/null
+++ b/pychunkedgraph/ingest/upgrade/parent_layer.py
@@ -0,0 +1,170 @@
+# pylint: disable=invalid-name, missing-docstring, c-extension-no-member
+
+import math, random, time
+import multiprocessing as mp
+from collections import defaultdict
+
+import fastremap
+import numpy as np
+from multiwrapper import multiprocessing_utils as mu
+
+from pychunkedgraph.graph import ChunkedGraph
+from pychunkedgraph.graph.attributes import Connectivity, Hierarchy
+from pychunkedgraph.graph.utils import serializers
+from pychunkedgraph.graph.types import empty_2d
+from pychunkedgraph.utils.general import chunked
+
+from .utils import exists_as_parent
+
+
+CHILDREN = {}
+CX_EDGES = {}
+
+
+def _populate_nodes_and_children(
+    cg: ChunkedGraph, chunk_id: np.uint64, nodes: list = None
+) -> dict:
+    global CHILDREN
+    if nodes:
+        CHILDREN = cg.get_children(nodes)
+        return
+    response = cg.range_read_chunk(chunk_id, properties=Hierarchy.Child)
+    for k, v in response.items():
+        CHILDREN[k] = v[0].value
+
+
+def _get_cx_edges_at_timestamp(node, response, ts):
+    result = defaultdict(list)
+    for child in CHILDREN[node]:
+        if child not in response:
+            continue
+        for key, cells in response[child].items():
+            for cell in cells:
+                # cells are sorted in descending order of timestamps
+                if ts >= cell.timestamp:
+                    result[key.index].append(cell.value)
+                    break
+    for layer, edges in result.items():
+        result[layer] = np.concatenate(edges)
+    return result
+
+
+def _populate_cx_edges_with_timestamps(
+    cg: ChunkedGraph, layer: int, nodes: list, nodes_ts: list
+):
+    """
+    Collect timestamps of edits from children, since we use the same timestamp
+    for all IDs involved in an edit, we can use the timestamps of
+    when cross edges of children were updated.
+    """
+    global CX_EDGES
+    attrs = [Connectivity.CrossChunkEdge[l] for l in range(layer, cg.meta.layer_count)]
+    all_children = np.concatenate(list(CHILDREN.values()))
+    response = cg.client.read_nodes(node_ids=all_children, properties=attrs)
+    for node, node_ts in zip(nodes, nodes_ts):
+        timestamps = set([node_ts])
+        for child in CHILDREN[node]:
+            if child not in response:
+                continue
+            for cells in response[child].values():
+                timestamps.update([c.timestamp for c in cells if c.timestamp > node_ts])
+        CX_EDGES[node] = {}
+        for ts in sorted(timestamps):
+            CX_EDGES[node][ts] = _get_cx_edges_at_timestamp(node, response, ts)
+
+
+def update_cross_edges(cg: ChunkedGraph, layer, node, node_ts, earliest_ts) -> list:
+    """
+    Helper function to update a single ID.
+    Returns a list of mutations with timestamps.
+    """
+    rows = []
+    if node_ts > earliest_ts:
+        try:
+            cx_edges_d = CX_EDGES[node][node_ts]
+        except KeyError:
+            raise KeyError(f"{node}:{node_ts}")
+        edges = np.concatenate([empty_2d] + list(cx_edges_d.values()))
+        if edges.size:
+            parents = cg.get_roots(
+                edges[:, 0], time_stamp=node_ts, stop_layer=layer, ceil=False
+            )
+            uparents = np.unique(parents)
+            layers = cg.get_chunk_layers(uparents)
+            uparents = uparents[layers == layer]
+            assert uparents.size <= 1, f"{node}, {node_ts}, {uparents}"
+            if uparents.size == 0 or node != uparents[0]:
+                # if node is not the parent at this ts, it must be invalid
+                assert not exists_as_parent(cg, node, edges[:, 0]), f"{node}, {node_ts}"
+                return rows
+
+    for ts, cx_edges_d in CX_EDGES[node].items():
+        edges = np.concatenate([empty_2d] + list(cx_edges_d.values()))
+        if edges.size == 0:
+            continue
+        nodes = np.unique(edges[:, 1])
+        parents = cg.get_roots(nodes, time_stamp=ts, stop_layer=layer, ceil=False)
+        edge_parents_d = dict(zip(nodes, parents))
+        val_dict = {}
+        for _layer, layer_edges in cx_edges_d.items():
+            layer_edges = fastremap.remap(
+                layer_edges, edge_parents_d, preserve_missing_labels=True
+            )
+            layer_edges[:, 0] = node
+            layer_edges = np.unique(layer_edges, axis=0)
+            col = Connectivity.CrossChunkEdge[_layer]
+            val_dict[col] = layer_edges
+        row_id = serializers.serialize_uint64(node)
+        rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp=ts))
+    return rows
+
+
+def _update_cross_edges_helper(args):
+    cg_info, layer, nodes, nodes_ts, earliest_ts = args
+    rows = []
+    cg = ChunkedGraph(**cg_info)
+    parents = cg.get_parents(nodes, fail_to_zero=True)
+    for node, parent, node_ts in zip(nodes, parents, nodes_ts):
+        if parent == 0:
+            # invalid id caused by failed ingest task
+            continue
+        _rows = update_cross_edges(cg, layer, node, node_ts, earliest_ts)
+        rows.extend(_rows)
+    cg.client.write(rows)
+
+
+def update_chunk(
+    cg: ChunkedGraph, chunk_coords: list[int], layer: int, nodes: list = None
+):
+    """
+    Iterate over all layer IDs in a chunk and update their cross chunk edges.
+    """
+    start = time.time()
+    x, y, z = chunk_coords
+    chunk_id = cg.get_chunk_id(layer=layer, x=x, y=y, z=z)
+    _populate_nodes_and_children(cg, chunk_id, nodes=nodes)
+    if not CHILDREN:
+        return
+    nodes = list(CHILDREN.keys())
+    random.shuffle(nodes)
+    nodes_ts = cg.get_node_timestamps(nodes, return_numpy=False, normalize=True)
+    _populate_cx_edges_with_timestamps(cg, layer, nodes, nodes_ts)
+
+    task_size = int(math.ceil(len(nodes) / mp.cpu_count() / 2))
+    chunked_nodes = chunked(nodes, task_size)
+    chunked_nodes_ts = chunked(nodes_ts, task_size)
+    cg_info = cg.get_serialized_info()
+    earliest_ts = cg.get_earliest_timestamp()
+
+    multi_args = []
+    for chunk, ts_chunk in zip(chunked_nodes, chunked_nodes_ts):
+        args = (cg_info, layer, chunk, ts_chunk, earliest_ts)
+        multi_args.append(args)
+
+    print(f"nodes: {len(nodes)}, tasks: {len(multi_args)}, size: {task_size}")
+    mu.multiprocess_func(
+        _update_cross_edges_helper,
+        multi_args,
+        n_threads=min(len(multi_args), mp.cpu_count()),
+    )
+    print(f"total elaspsed time: {time.time() - start}")
diff --git a/pychunkedgraph/ingest/upgrade/utils.py b/pychunkedgraph/ingest/upgrade/utils.py
new file mode 100644
index 000000000..43c9a3034
--- /dev/null
+++ b/pychunkedgraph/ingest/upgrade/utils.py
@@ -0,0 +1,13 @@
+from pychunkedgraph.graph import ChunkedGraph
+from pychunkedgraph.graph.attributes import Hierarchy
+
+
+def exists_as_parent(cg: ChunkedGraph, parent, nodes) -> bool:
+    """
+    Check if a given l2 parent is in the history of given nodes.
+    """
+    response = cg.client.read_nodes(node_ids=nodes, properties=Hierarchy.Parent)
+    parents = set()
+    for cells in response.values():
+        parents.update([cell.value for cell in cells])
+    return parent in parents
diff --git a/pychunkedgraph/ingest/utils.py b/pychunkedgraph/ingest/utils.py
index 1c3236561..3d573ce37 100644
--- a/pychunkedgraph/ingest/utils.py
+++ b/pychunkedgraph/ingest/utils.py
@@ -1,14 +1,21 @@
 # pylint: disable=invalid-name, missing-docstring
-from typing import Tuple
 
-from . import ClusterIngestConfig
-from . import IngestConfig
-from ..graph.meta import ChunkedGraphMeta
-from ..graph.meta import DataSource
-from ..graph.meta import GraphConfig
+import logging
+from os import environ
+from time import sleep
+from typing import Any, Generator, Tuple
+
+import numpy as np
+import tensorstore as ts
+from rq import Queue, Worker
+from rq.worker import WorkerStatus
 
+from . import IngestConfig
+from .manager import IngestionManager
+from ..graph.meta import ChunkedGraphMeta, DataSource, GraphConfig
 from ..graph.client import BackendClientInfo
 from ..graph.client.bigtable import BigTableConfig
+from ..utils.general import chunked
 
 chunk_id_str = lambda layer, coords: f"{layer}_{'_'.join(map(str, coords))}"
 
@@ -16,14 +23,12 @@
 def bootstrap(
     graph_id: str,
     config: dict,
-    overwrite: bool = False,
     raw: bool = False,
     test_run: bool = False,
 ) -> Tuple[ChunkedGraphMeta, IngestConfig, BackendClientInfo]:
     """Parse config loaded from a yaml file."""
     ingest_config = IngestConfig(
         **config.get("ingest_config", {}),
-        CLUSTER=ClusterIngestConfig(),
         USE_RAW_EDGES=raw,
         USE_RAW_COMPONENTS=raw,
         TEST_RUN=test_run,
@@ -33,7 +38,7 @@ def bootstrap(
 
     graph_config = GraphConfig(
         ID=f"{graph_id}",
-        OVERWRITE=overwrite,
+        OVERWRITE=False,
         **config["graph_config"],
     )
     data_source = DataSource(**config["data_source"])
@@ -73,3 +78,115 @@ def postprocess_edge_data(im, edge_dict):
         return new_edge_dict
     else:
         raise ValueError(f"Unknown data_version: {data_version}")
+
+
+def start_ocdbt_server(imanager: IngestionManager, server: Any):
+    spec = {"driver": "ocdbt", "base": f"{imanager.cg.meta.data_source.EDGES}/ocdbt"}
+    spec["coordinator"] = {"address": f"localhost:{server.port}"}
+    ts.KvStore.open(spec).result()
+    imanager.redis.set("OCDBT_COORDINATOR_PORT", str(server.port))
+    ocdbt_host = environ.get("MY_POD_IP", "localhost")
+    imanager.redis.set("OCDBT_COORDINATOR_HOST", ocdbt_host)
+    logging.info(f"OCDBT Coordinator address {ocdbt_host}:{server.port}")
+
+
+def randomize_grid_points(X: int, Y: int, Z: int) -> Generator[int, int, int]:
+    indices = np.arange(X * Y * Z)
+    np.random.shuffle(indices)
+    for index in indices:
+        yield np.unravel_index(index, (X, Y, Z))
+
+
+def get_chunks_not_done(imanager: IngestionManager, layer: int, coords: list) -> list:
+    """check for set membership in redis in batches"""
+    coords_strs = ["_".join(map(str, coord)) for coord in coords]
+    try:
+        completed = imanager.redis.smismember(f"{layer}c", coords_strs)
+    except Exception:
+        return coords
+    return [coord for coord, c in zip(coords, completed) if not c]
+
+
+def print_completion_rate(imanager: IngestionManager, layer: int, span: int = 10):
+    counts = []
+    for _ in range(span + 1):
+        counts.append(imanager.redis.scard(f"{layer}c"))
+        sleep(1)
+    rate = np.diff(counts).sum() / span
+    print(f"{rate} chunks per second.")
+
+
+def print_ingest_status(imanager: IngestionManager, redis, upgrade: bool = False):
+    """
+    Helper to print status to console.
+    If `upgrade=True`, status does not include the root layer,
+    since there is no need to update cross edges for root ids.
+    """
+    layers = range(2, imanager.cg_meta.layer_count + 1)
+    if upgrade:
+        layers = range(2, imanager.cg_meta.layer_count)
+    layer_counts = imanager.cg_meta.layer_chunk_counts
+
+    pipeline = redis.pipeline()
+    worker_busy = []
+    for layer in layers:
+        pipeline.scard(f"{layer}c")
+        queue = Queue(f"l{layer}", connection=redis)
+        pipeline.llen(queue.key)
+        pipeline.zcard(queue.failed_job_registry.key)
+        workers = Worker.all(queue=queue)
+        worker_busy.append(sum([w.get_state() == WorkerStatus.BUSY for w in workers]))
+
+    results = pipeline.execute()
+    completed = []
+    queued = []
+    failed = []
+    for i in range(0, len(results), 3):
+        result = results[i : i + 3]
+        completed.append(result[0])
+        queued.append(result[1])
+        failed.append(result[2])
+
+    print(f"version: \t{imanager.cg.version}")
+    print(f"graph_id: \t{imanager.cg.graph_id}")
+    print(f"chunk_size: \t{imanager.cg.meta.graph_config.CHUNK_SIZE}")
+    print("\nlayer status:")
+    for layer, done, count in zip(layers, completed, layer_counts):
+        print(f"{layer}\t: {done:<9} / {count}")
+
+    print("\n\nqueue status:")
+    for layer, q, f, wb in zip(layers, queued, failed, worker_busy):
+        print(f"l{layer}\t: queued: {q:<10} failed: {f:<10} busy: {wb}")
+
+
+def queue_layer_helper(parent_layer: int, imanager: IngestionManager, fn):
+    if parent_layer == imanager.cg_meta.layer_count:
+        chunk_coords = [(0, 0, 0)]
+    else:
+        bounds = imanager.cg_meta.layer_chunk_bounds[parent_layer]
+        chunk_coords = randomize_grid_points(*bounds)
+
+    q = imanager.get_task_queue(f"l{parent_layer}")
+    batch_size = int(environ.get("JOB_BATCH_SIZE", 10000))
+    timeout_scale = int(environ.get("TIMEOUT_SCALE_FACTOR", 1))
+    batches = chunked(chunk_coords, batch_size)
+    for batch in batches:
+        _coords = get_chunks_not_done(imanager, parent_layer, batch)
+        # buffer for optimal use of redis memory
+        if len(q) > int(environ.get("QUEUE_SIZE", 100000)):
+            interval = int(environ.get("QUEUE_INTERVAL", 300))
+            logging.info(f"Queue full; sleeping {interval}s...")
+            sleep(interval)
+
+        job_datas = []
+        for chunk_coord in _coords:
+            job_datas.append(
+                Queue.prepare_data(
+                    fn,
+                    args=(parent_layer, chunk_coord),
+                    result_ttl=0,
+                    job_id=chunk_id_str(parent_layer, chunk_coord),
+                    timeout=f"{timeout_scale * int(parent_layer * parent_layer)}m",
+                )
+            )
+        q.enqueue_many(job_datas)
diff --git a/pychunkedgraph/repair/edits.py b/pychunkedgraph/repair/edits.py
index cb403a380..849b17e08 100644
--- a/pychunkedgraph/repair/edits.py
+++ b/pychunkedgraph/repair/edits.py
@@ -56,8 +56,6 @@ def repair_operation(
             op_ids_to_retry.append(locked_op)
             print(f"{node_id} indefinitely locked by op {locked_op}")
     print(f"total to retry: {len(op_ids_to_retry)}")
-
-    logs = cg.client.read_log_entries(op_ids_to_retry)
-    for op_id, log in logs.items():
+    for op_id in op_ids_to_retry:
         print(f"repairing {op_id}")
-        repair_operation(cg, log, op_id)
+        repair_operation(cg, op_id)
diff --git a/pychunkedgraph/tests/helpers.py b/pychunkedgraph/tests/helpers.py
index de5314422..b9c689ad6 100644
--- a/pychunkedgraph/tests/helpers.py
+++ b/pychunkedgraph/tests/helpers.py
@@ -14,12 +14,12 @@
 from google.cloud import bigtable
 
 from ..ingest.utils import bootstrap
-from ..ingest.create.atomic_layer import add_atomic_edges
+from ..ingest.create.atomic_layer import add_atomic_chunk
 from ..graph.edges import Edges
 from ..graph.edges import EDGE_TYPES
 from ..graph.utils import basetypes
 from ..graph.chunkedgraph import ChunkedGraph
-from ..ingest.create.abstract_layers import add_layer
+from ..ingest.create.parent_layer import add_parent_chunk
 
 
 class CloudVolumeBounds(object):
@@ -120,7 +120,7 @@ def _cgraph(request, n_layers=10, atomic_chunk_bounds: np.ndarray = np.array([])
                 "FANOUT": 2,
                 "SPATIAL_BITS": 10,
                 "ID_PREFIX": "",
-                "ROOT_LOCK_EXPIRY": timedelta(seconds=5)
+                "ROOT_LOCK_EXPIRY": timedelta(seconds=5),
             },
             "backend_client": {
                 "TYPE": "bigtable",
@@ -130,15 +130,14 @@ def _cgraph(request, n_layers=10, atomic_chunk_bounds: np.ndarray = np.array([])
                     "PROJECT": "IGNORE_ENVIRONMENT_PROJECT",
                     "INSTANCE": "emulated_instance",
                     "CREDENTIALS": credentials.AnonymousCredentials(),
-                    "MAX_ROW_KEY_COUNT": 1000
+                    "MAX_ROW_KEY_COUNT": 1000,
                 },
             },
             "ingest_config": {},
         }
 
         meta, _, client_info = bootstrap("test", config=config)
-        graph = ChunkedGraph(graph_id="test", meta=meta,
-                             client_info=client_info)
+        graph = ChunkedGraph(graph_id="test", meta=meta, client_info=client_info)
         graph.mock_edges = Edges([], [])
         graph.meta._ws_cv = CloudVolumeMock()
         graph.meta.layer_count = n_layers
@@ -176,8 +175,7 @@ def gen_graph_simplequerytest(request, gen_graph):
     # Chunk B
     create_chunk(
         graph,
-        vertices=[to_label(graph, 1, 1, 0, 0, 0),
-                  to_label(graph, 1, 1, 0, 0, 1)],
+        vertices=[to_label(graph, 1, 1, 0, 0, 0), to_label(graph, 1, 1, 0, 0, 1)],
         edges=[
             (to_label(graph, 1, 1, 0, 0, 0), to_label(graph, 1, 1, 0, 0, 1), 0.5),
             (to_label(graph, 1, 1, 0, 0, 0), to_label(graph, 1, 2, 0, 0, 0), inf),
@@ -188,13 +186,12 @@ def gen_graph_simplequerytest(request, gen_graph):
     create_chunk(
         graph,
         vertices=[to_label(graph, 1, 2, 0, 0, 0)],
-        edges=[(to_label(graph, 1, 2, 0, 0, 0),
-                to_label(graph, 1, 1, 0, 0, 0), inf)],
+        edges=[(to_label(graph, 1, 2, 0, 0, 0), to_label(graph, 1, 1, 0, 0, 0), inf)],
     )
 
-    add_layer(graph, 3, [0, 0, 0], n_threads=1)
-    add_layer(graph, 3, [1, 0, 0], n_threads=1)
-    add_layer(graph, 4, [0, 0, 0], n_threads=1)
+    add_parent_chunk(graph, 3, [0, 0, 0], n_threads=1)
+    add_parent_chunk(graph, 3, [1, 0, 0], n_threads=1)
+    add_parent_chunk(graph, 4, [0, 0, 0], n_threads=1)
 
     return graph
 
@@ -206,8 +203,7 @@ def create_chunk(cg, vertices=None, edges=None, timestamp=None):
     edges = edges if edges else []
     vertices = vertices if vertices else []
     vertices = np.unique(np.array(vertices, dtype=np.uint64))
-    edges = [(np.uint64(v1), np.uint64(v2), np.float32(aff))
-             for v1, v2, aff in edges]
+    edges = [(np.uint64(v1), np.uint64(v2), np.float32(aff)) for v1, v2, aff in edges]
     isolated_ids = [
         x
         for x in vertices
@@ -230,8 +226,7 @@ def create_chunk(cg, vertices=None, edges=None, timestamp=None):
 
     chunk_id = None
     if len(chunk_edges_active[EDGE_TYPES.in_chunk]):
-        chunk_id = cg.get_chunk_id(
-            chunk_edges_active[EDGE_TYPES.in_chunk].node_ids1[0])
+        chunk_id = cg.get_chunk_id(chunk_edges_active[EDGE_TYPES.in_chunk].node_ids1[0])
     elif len(vertices):
         chunk_id = cg.get_chunk_id(vertices[0])
 
@@ -257,7 +252,7 @@ def create_chunk(cg, vertices=None, edges=None, timestamp=None):
     cg.mock_edges += all_edges
 
     isolated_ids = np.array(isolated_ids, dtype=np.uint64)
-    add_atomic_edges(
+    add_atomic_chunk(
         cg,
         cg.get_chunk_coordinates(chunk_id),
         chunk_edges_active,
@@ -282,21 +277,21 @@ def get_layer_chunk_bounds(
     return layer_bounds_d
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def sv_data():
-    test_data_dir = 'pychunkedgraph/tests/data'
-    edges_file = f'{test_data_dir}/sv_edges.npy'
+    test_data_dir = "pychunkedgraph/tests/data"
+    edges_file = f"{test_data_dir}/sv_edges.npy"
     sv_edges = np.load(edges_file)
 
-    source_file = f'{test_data_dir}/sv_sources.npy'
+    source_file = f"{test_data_dir}/sv_sources.npy"
     sv_sources = np.load(source_file)
 
-    sinks_file = f'{test_data_dir}/sv_sinks.npy'
+    sinks_file = f"{test_data_dir}/sv_sinks.npy"
     sv_sinks = np.load(sinks_file)
 
-    affinity_file = f'{test_data_dir}/sv_affinity.npy'
+    affinity_file = f"{test_data_dir}/sv_affinity.npy"
     sv_affinity = np.load(affinity_file)
 
-    area_file = f'{test_data_dir}/sv_area.npy'
+    area_file = f"{test_data_dir}/sv_area.npy"
     sv_area = np.load(area_file)
     yield (sv_edges, sv_sources, sv_sinks, sv_affinity, sv_area)
diff --git a/pychunkedgraph/tests/test_uncategorized.py b/pychunkedgraph/tests/test_uncategorized.py
index 93c41158d..8b26f5c5e 100644
--- a/pychunkedgraph/tests/test_uncategorized.py
+++ b/pychunkedgraph/tests/test_uncategorized.py
@@ -36,7 +36,7 @@
 from ..graph.lineage import get_future_root_ids
 from ..graph.utils.serializers import serialize_uint64
 from ..graph.utils.serializers import deserialize_uint64
-from ..ingest.create.abstract_layers import add_layer
+from ..ingest.create.parent_layer import add_parent_chunk
 
 
 class TestGraphNodeConversion:
@@ -68,9 +68,9 @@ def test_node_id_adjacency(self, gen_graph):
         ) == cg.get_node_id(np.uint64(1), layer=2, x=3, y=1, z=0)
 
         assert cg.get_node_id(
-            np.uint64(2 ** 53 - 2), layer=10, x=0, y=0, z=0
+            np.uint64(2**53 - 2), layer=10, x=0, y=0, z=0
         ) + np.uint64(1) == cg.get_node_id(
-            np.uint64(2 ** 53 - 1), layer=10, x=0, y=0, z=0
+            np.uint64(2**53 - 1), layer=10, x=0, y=0, z=0
         )
 
     @pytest.mark.timeout(30)
@@ -82,9 +82,9 @@ def test_serialize_node_id(self, gen_graph):
         ) < serialize_uint64(cg.get_node_id(np.uint64(1), layer=2, x=3, y=1, z=0))
 
         assert serialize_uint64(
-            cg.get_node_id(np.uint64(2 ** 53 - 2), layer=10, x=0, y=0, z=0)
+            cg.get_node_id(np.uint64(2**53 - 2), layer=10, x=0, y=0, z=0)
         ) < serialize_uint64(
-            cg.get_node_id(np.uint64(2 ** 53 - 1), layer=10, x=0, y=0, z=0)
+            cg.get_node_id(np.uint64(2**53 - 1), layer=10, x=0, y=0, z=0)
         )
 
     @pytest.mark.timeout(30)
@@ -222,7 +222,7 @@ def test_build_single_across_edge(self, gen_graph):
             edges=[(to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), inf)],
         )
 
-        add_layer(cg, 3, [0, 0, 0], n_threads=1)
+        add_parent_chunk(cg, 3, [0, 0, 0], n_threads=1)
         res = cg.client._table.read_rows()
         res.consume_all()
 
@@ -327,7 +327,7 @@ def test_build_single_edge_and_single_across_edge(self, gen_graph):
             edges=[(to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), inf)],
         )
 
-        add_layer(cg, 3, np.array([0, 0, 0]), n_threads=1)
+        add_parent_chunk(cg, 3, np.array([0, 0, 0]), n_threads=1)
         res = cg.client._table.read_rows()
         res.consume_all()
 
@@ -424,10 +424,10 @@ def test_build_big_graph(self, gen_graph):
         # Preparation: Build Chunk Z
         create_chunk(cg, vertices=[to_label(cg, 1, 7, 7, 7, 0)], edges=[])
 
-        add_layer(cg, 3, [0, 0, 0], n_threads=1)
-        add_layer(cg, 3, [3, 3, 3], n_threads=1)
-        add_layer(cg, 4, [0, 0, 0], n_threads=1)
-        add_layer(cg, 5, [0, 0, 0], n_threads=1)
+        add_parent_chunk(cg, 3, [0, 0, 0], n_threads=1)
+        add_parent_chunk(cg, 3, [3, 3, 3], n_threads=1)
+        add_parent_chunk(cg, 4, [0, 0, 0], n_threads=1)
+        add_parent_chunk(cg, 5, [0, 0, 0], n_threads=1)
 
         res = cg.client._table.read_rows()
         res.consume_all()
@@ -468,21 +468,21 @@ def test_double_chunk_creation(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             4,
             [0, 0, 0],
@@ -831,7 +831,7 @@ def test_merge_pair_neighboring_chunks(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -887,28 +887,28 @@ def test_merge_pair_disconnected_chunks(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [3, 3, 3],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             4,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             5,
             [0, 0, 0],
@@ -1052,7 +1052,7 @@ def test_merge_triple_chain_to_full_circle_neighboring_chunks(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -1111,35 +1111,35 @@ def test_merge_triple_chain_to_full_circle_disconnected_chunks(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [3, 3, 3],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             4,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             4,
             [1, 1, 1],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             5,
             [0, 0, 0],
@@ -1239,7 +1239,7 @@ def test_merge_pair_abstract_nodes(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -1314,7 +1314,7 @@ def test_diagonal_connections(self, gen_graph):
             edges=[(to_label(cg, 1, 1, 1, 0, 0), to_label(cg, 1, 0, 1, 0, 0), inf)],
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -1405,28 +1405,28 @@ def test_cross_edges(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [1, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             4,
             [0, 0, 0],
             time_stamp=fake_timestamp,
             n_threads=1,
         )
-        add_layer(
+        add_parent_chunk(
             cg,
             5,
             [0, 0, 0],
@@ -1591,7 +1591,7 @@ def test_cut_regular_link(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -1662,7 +1662,7 @@ def test_cut_no_link(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -1723,7 +1723,7 @@ def test_cut_old_link(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -1791,7 +1791,7 @@ def test_cut_indivisible_link(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -1922,7 +1922,7 @@ def test_cut_merge_history(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -2063,7 +2063,7 @@ def test_lock_unlock(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -2129,7 +2129,7 @@ def test_lock_expiration(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -2197,7 +2197,7 @@ def test_lock_renew(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -2249,7 +2249,7 @@ def test_lock_merge_lock_old_id(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -2315,7 +2315,7 @@ def test_indefinite_lock(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
@@ -2388,7 +2388,7 @@ def test_indefinite_lock_with_normal_lock_expiration(self, gen_graph):
             timestamp=fake_timestamp,
         )
 
-        add_layer(
+        add_parent_chunk(
             cg,
             3,
             [0, 0, 0],
diff --git a/pychunkedgraph/utils/general.py b/pychunkedgraph/utils/general.py
index 719473c6f..ac4929660 100644
--- a/pychunkedgraph/utils/general.py
+++ b/pychunkedgraph/utils/general.py
@@ -1,7 +1,9 @@
 """
 generic helper funtions
 """
+
 from typing import Sequence
+from itertools import islice
 
 
 import numpy as np
@@ -24,11 +26,15 @@ def reverse_dictionary(dictionary):
 
 
 def chunked(l: Sequence, n: int):
-    """Yield successive n-sized chunks from l."""
+    """
+    Yield successive n-sized chunks from l.
+    NOTE: Use itertools.batched from python 3.12
+    """
     if n < 1:
         n = len(l)
-    for i in range(0, len(l), n):
-        yield l[i : i + n]
+    it = iter(l)
+    while batch := tuple(islice(it, n)):
+        yield batch
 
 
 def in2d(arr1: np.ndarray, arr2: np.ndarray) -> np.ndarray:
diff --git a/requirements.in b/requirements.in
index 63e0b3472..1ec536a5c 100644
--- a/requirements.in
+++ b/requirements.in
@@ -15,6 +15,7 @@ rq<2
 pyyaml
 cachetools
 werkzeug
+tensorstore
 
 # PyPI only:
 cloud-files>=4.21.1
diff --git a/requirements.txt b/requirements.txt
index 5a2f18adc..059b8fd91 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -192,6 +192,8 @@ messagingclient==0.1.3
     # via -r requirements.in
 middle-auth-client==3.16.1
     # via -r requirements.in
+ml-dtypes==0.3.2
+    # via tensorstore
 multiprocess==0.70.15
     # via pathos
 multiwrapper==0.1.1
@@ -210,11 +212,13 @@ numpy==1.26.0
     #   fastremap
     #   fpzip
     #   messagingclient
+    #   ml-dtypes
     #   multiwrapper
     #   pandas
     #   pyspng-seunglab
     #   simplejpeg
     #   task-queue
+    #   tensorstore
     #   zfpc
     #   zmesh
 orderedmultidict==1.0.1
@@ -337,6 +341,8 @@ tenacity==8.2.3
     #   cloud-files
     #   cloud-volume
     #   task-queue
+tensorstore==0.1.53
+    # via -r requirements.in
 tqdm==4.66.1
     # via
     #   cloud-files

From 9e49fd798152650c600af52a6240a80efba319a6 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 12 May 2024 16:10:12 +0000
Subject: [PATCH 082/105] reset version v3

---
 .bumpversion.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 1e048a94a..5583246c5 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.18.3
+current_version = 3.0.0
 commit = True
 tag = True
 

From b42a59ccb9e615bf84570fb3d91c46e9c5da65cc Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 12 May 2024 18:18:05 +0000
Subject: [PATCH 083/105] breakup long fn

---
 pychunkedgraph/ingest/create/parent_layer.py | 96 ++++++++++----------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/pychunkedgraph/ingest/create/parent_layer.py b/pychunkedgraph/ingest/create/parent_layer.py
index 09be61407..a777d9efc 100644
--- a/pychunkedgraph/ingest/create/parent_layer.py
+++ b/pychunkedgraph/ingest/create/parent_layer.py
@@ -154,13 +154,50 @@ def _write_components_helper(args):
     _write(cg, layer, pcoords, ccs, node_layer_d, time_stamp)
 
 
+def _children_rows(
+    cg: ChunkedGraph, parent_id, children: Sequence, cx_edges_d: dict, time_stamp
+):
+    """
+    Update children rows to point to the parent_id, collect cached children
+    cross chunk edges to lift and update parent cross chunk edges.
+    Returns list of mutations to children and list of children cross edges.
+    """
+    rows = []
+    children_cx_edges = []
+    for child in children:
+        node_layer = cg.get_chunk_layer(child)
+        row_id = serializers.serialize_uint64(child)
+        val_dict = {attributes.Hierarchy.Parent: parent_id}
+        node_cx_edges_d = cx_edges_d.get(child, {})
+        if not node_cx_edges_d:
+            rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
+            continue
+        for layer in range(node_layer, cg.meta.layer_count):
+            if not layer in node_cx_edges_d:
+                continue
+            layer_edges = node_cx_edges_d[layer]
+            nodes = np.unique(layer_edges)
+            parents = cg.get_roots(nodes, stop_layer=node_layer, ceil=False)
+            edge_parents_d = dict(zip(nodes, parents))
+            layer_edges = fastremap.remap(
+                layer_edges, edge_parents_d, preserve_missing_labels=True
+            )
+            layer_edges = np.unique(layer_edges, axis=0)
+            col = attributes.Connectivity.CrossChunkEdge[layer]
+            val_dict[col] = layer_edges
+            node_cx_edges_d[layer] = layer_edges
+        children_cx_edges.append(node_cx_edges_d)
+        rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
+    return rows, children_cx_edges
+
+
 def _write(
     cg: ChunkedGraph,
     layer_id,
     parent_coords,
     components,
     node_layer_d,
-    time_stamp,
+    ts,
     use_threads=True,
 ):
     parent_layers = range(layer_id, cg.meta.layer_count + 1)
@@ -175,71 +212,34 @@ def _write(
     x, y, z = parent_coords
     parent_chunk_id = cg.get_chunk_id(layer=layer_id, x=x, y=y, z=z)
     parent_chunk_id_dict = cg.get_parent_chunk_id_dict(parent_chunk_id)
-
     for parent_layer in parent_layers:
         if len(cc_connections[parent_layer]) == 0:
             continue
-
         parent_chunk_id = parent_chunk_id_dict[parent_layer]
         reserved_parent_ids = cg.id_client.create_node_ids(
             parent_chunk_id,
             size=len(cc_connections[parent_layer]),
             root_chunk=parent_layer == cg.meta.layer_count and use_threads,
         )
-
-        for i_cc, node_ids in enumerate(cc_connections[parent_layer]):
-            parent_id = reserved_parent_ids[i_cc]
-
+        for i_cc, children in enumerate(cc_connections[parent_layer]):
+            parent = reserved_parent_ids[i_cc]
             if layer_id == 3:
                 # when layer 3 is being processed, children chunks are at layer 2
                 # layer 2 chunks at this time will only have atomic cross edges
-                cx_edges_d = cg.get_atomic_cross_edges(node_ids)
+                cx_edges_d = cg.get_atomic_cross_edges(children)
             else:
-                # children are from abstract chunks
-                cx_edges_d = cg.get_cross_chunk_edges(node_ids, raw_only=True)
-
-            children_cx_edges = []
-            for node in node_ids:
-                node_layer = cg.get_chunk_layer(node)
-                row_id = serializers.serialize_uint64(node)
-                val_dict = {attributes.Hierarchy.Parent: parent_id}
-
-                node_cx_edges_d = cx_edges_d.get(node, {})
-                if not node_cx_edges_d:
-                    rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
-                    continue
-
-                for layer in range(node_layer, cg.meta.layer_count):
-                    if not layer in node_cx_edges_d:
-                        continue
-                    layer_edges = node_cx_edges_d[layer]
-                    nodes = np.unique(layer_edges)
-                    parents = cg.get_roots(nodes, stop_layer=node_layer, ceil=False)
-
-                    edge_parents_d = dict(zip(nodes, parents))
-                    layer_edges = fastremap.remap(
-                        layer_edges, edge_parents_d, preserve_missing_labels=True
-                    )
-                    layer_edges = np.unique(layer_edges, axis=0)
-
-                    col = attributes.Connectivity.CrossChunkEdge[layer]
-                    val_dict[col] = layer_edges
-                    node_cx_edges_d[layer] = layer_edges
-                children_cx_edges.append(node_cx_edges_d)
-                rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
-
-            row_id = serializers.serialize_uint64(parent_id)
-            val_dict = {attributes.Hierarchy.Child: node_ids}
-            parent_cx_edges_d = concatenate_cross_edge_dicts(
-                children_cx_edges, unique=True
-            )
+                cx_edges_d = cg.get_cross_chunk_edges(children, raw_only=True)
+            _rows, cx_edges = _children_rows(cg, parent, children, cx_edges_d, ts)
+            rows.extend(_rows)
+            row_id = serializers.serialize_uint64(parent)
+            val_dict = {attributes.Hierarchy.Child: children}
+            parent_cx_edges_d = concatenate_cross_edge_dicts(cx_edges, unique=True)
             for layer in range(parent_layer, cg.meta.layer_count):
                 if not layer in parent_cx_edges_d:
                     continue
                 col = attributes.Connectivity.CrossChunkEdge[layer]
                 val_dict[col] = parent_cx_edges_d[layer]
-
-            rows.append(cg.client.mutate_row(row_id, val_dict, time_stamp))
+            rows.append(cg.client.mutate_row(row_id, val_dict, ts))
             if len(rows) > 100000:
                 cg.client.write(rows)
                 rows = []

From fb0e5d3eac68805277f427bef6a90d370a310258 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Wed, 15 May 2024 00:11:42 +0000
Subject: [PATCH 084/105] gh actions for pcgv3

---
 .github/workflows/main.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 899f0431f..fd20bf4b7 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -4,9 +4,11 @@ on:
   push:
     branches:
       - "main"
+      - "pcgv3"
   pull_request:
     branches:
       - "main"
+      - "pcgv3"
 
 jobs:
   unit-tests:

From 19a1a674eccd5a93a3baca63bd169068bbfc1f88 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Fri, 24 May 2024 21:31:39 -0500
Subject: [PATCH 085/105] update split tests (#497)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(ingest): use temporarily cached cross chunk edges

* fix: switch to using partners vector instead of 2d edges array

* fix(edits): l2 - use and store cx edges that become relevant only at l2

* chore: rename counterpart to partner

* fix: update partner cx edges

* feat(edits): use layer relevant partners

* fix tests

* persist cross chunk layers with each node

* fix: update cross chunk layers in edits

* fix: update cross layer from old ids in l2

* update deprecated utcnoww

* fix split tests

* Bump version: 3.0.0 → 3.0.1

* fix: missed timestamp arg

* update docs, remove unnecessary methods

* revert structural changes

* fix new tests; revert bumpversion.cfg
---
 pychunkedgraph/graph/edits.py                |   22 +-
 pychunkedgraph/graph/misc.py                 |   58 +-
 pychunkedgraph/graph/utils/basetypes.py      |   22 +-
 pychunkedgraph/ingest/create/parent_layer.py |    3 +-
 pychunkedgraph/tests/helpers.py              |    1 +
 pychunkedgraph/tests/test_uncategorized.py   | 2141 ++++++++----------
 6 files changed, 1036 insertions(+), 1211 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index ee7e643c3..807fff257 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -251,7 +251,7 @@ def add_edges(
     return new_roots, new_l2_ids, create_parents.new_entries
 
 
-def _process_l2_agglomeration(
+def _split_l2_agglomeration(
     cg,
     operation_id: int,
     agg: types.Agglomeration,
@@ -272,16 +272,16 @@ def _process_l2_agglomeration(
     # if there aren't any, there must be no parents. XOR these 2 conditions.
     err = f"got cross edges from more than one l2 node; op {operation_id}"
     assert (np.unique(parents).size == 1) != (cross_edges.size == 0), err
-    root = cg.get_root(parents[0], time_stamp=parent_ts, raw_only=True)
-
-    # inactive edges must be filtered out
-    neighbor_roots = cg.get_roots(
-        cross_edges[:, 1], raw_only=True, time_stamp=parent_ts
-    )
-    active_mask = neighbor_roots == root
-    cross_edges = cross_edges[active_mask]
-    cross_edges = cross_edges[~in2d(cross_edges, removed_edges)]
 
+    if cross_edges.size:
+        # inactive edges must be filtered out
+        root = cg.get_root(parents[0], time_stamp=parent_ts, raw_only=True)
+        neighbor_roots = cg.get_roots(
+            cross_edges[:, 1], raw_only=True, time_stamp=parent_ts
+        )
+        active_mask = neighbor_roots == root
+        cross_edges = cross_edges[active_mask]
+        cross_edges = cross_edges[~in2d(cross_edges, removed_edges)]
     isolated_ids = agg.supervoxels[~np.in1d(agg.supervoxels, chunk_edges)]
     isolated_edges = np.column_stack((isolated_ids, isolated_ids))
     graph, _, _, graph_ids = flatgraph.build_gt_graph(
@@ -332,7 +332,7 @@ def remove_edges(
     new_l2_ids = []
     for id_ in l2ids:
         agg = l2id_agglomeration_d[id_]
-        ccs, graph_ids, cross_edges = _process_l2_agglomeration(
+        ccs, graph_ids, cross_edges = _split_l2_agglomeration(
             cg, operation_id, agg, removed_edges, parent_ts
         )
         new_parents = cg.id_client.create_node_ids(chunk_id_map[agg.node_id], len(ccs))
diff --git a/pychunkedgraph/graph/misc.py b/pychunkedgraph/graph/misc.py
index 873422db1..0f53c71c3 100644
--- a/pychunkedgraph/graph/misc.py
+++ b/pychunkedgraph/graph/misc.py
@@ -8,7 +8,6 @@
 
 import fastremap
 import numpy as np
-from multiwrapper import multiprocessing_utils as mu
 
 from . import ChunkedGraph
 from . import attributes
@@ -51,22 +50,6 @@ def _read_delta_root_rows(
     return new_root_ids, expired_root_ids
 
 
-def _read_root_rows_thread(args) -> list:
-    start_seg_id, end_seg_id, serialized_cg_info, time_stamp = args
-    cg = ChunkedGraph(**serialized_cg_info)
-    start_id = cg.get_node_id(segment_id=start_seg_id, chunk_id=cg.root_chunk_id)
-    end_id = cg.get_node_id(segment_id=end_seg_id, chunk_id=cg.root_chunk_id)
-    rows = cg.client.read_nodes(
-        start_id=start_id,
-        end_id=end_id,
-        end_id_inclusive=False,
-        end_time=time_stamp,
-        end_time_inclusive=True,
-    )
-    root_ids = [k for (k, v) in rows.items() if attributes.Hierarchy.NewParent not in v]
-    return root_ids
-
-
 def get_proofread_root_ids(
     cg: ChunkedGraph,
     start_time: Optional[datetime.datetime] = None,
@@ -94,43 +77,12 @@ def get_proofread_root_ids(
 
 
 def get_latest_roots(
-    cg, time_stamp: Optional[datetime.datetime] = None, n_threads: int = 1
+    cg: ChunkedGraph, time_stamp: Optional[datetime.datetime] = None, n_threads: int = 1
 ) -> Sequence[np.uint64]:
-    # Create filters: time and id range
-    max_seg_id = cg.get_max_seg_id(cg.root_chunk_id) + 1
-    n_blocks = 1 if n_threads == 1 else int(np.min([n_threads * 3 + 1, max_seg_id]))
-    seg_id_blocks = np.linspace(1, max_seg_id, n_blocks + 1, dtype=np.uint64)
-    cg_serialized_info = cg.get_serialized_info()
-    if n_threads > 1:
-        del cg_serialized_info["credentials"]
-
-    multi_args = []
-    for i_id_block in range(0, len(seg_id_blocks) - 1):
-        multi_args.append(
-            [
-                seg_id_blocks[i_id_block],
-                seg_id_blocks[i_id_block + 1],
-                cg_serialized_info,
-                time_stamp,
-            ]
-        )
-
-    if n_threads == 1:
-        results = mu.multiprocess_func(
-            _read_root_rows_thread,
-            multi_args,
-            n_threads=n_threads,
-            verbose=False,
-            debug=n_threads == 1,
-        )
-    else:
-        results = mu.multisubprocess_func(
-            _read_root_rows_thread, multi_args, n_threads=n_threads
-        )
-    root_ids = []
-    for result in results:
-        root_ids.extend(result)
-    return np.array(root_ids, dtype=np.uint64)
+    root_chunk = cg.get_chunk_id(layer=cg.meta.layer_count, x=0, y=0, z=0)
+    rr = cg.range_read_chunk(root_chunk, time_stamp=time_stamp)
+    roots = [k for k, v in rr.items() if attributes.Hierarchy.NewParent not in v]
+    return np.array(roots, dtype=np.uint64)
 
 
 def get_delta_roots(
diff --git a/pychunkedgraph/graph/utils/basetypes.py b/pychunkedgraph/graph/utils/basetypes.py
index e55324e6a..c6b0b1974 100644
--- a/pychunkedgraph/graph/utils/basetypes.py
+++ b/pychunkedgraph/graph/utils/basetypes.py
@@ -1,16 +1,16 @@
 import numpy as np
 
 
-CHUNK_ID = SEGMENT_ID = NODE_ID = OPERATION_ID = np.dtype('uint64').newbyteorder('L')
-EDGE_AFFINITY = np.dtype('float32').newbyteorder('L')
-EDGE_AREA = np.dtype('uint64').newbyteorder('L')
+CHUNK_ID = SEGMENT_ID = NODE_ID = OPERATION_ID = np.dtype("uint64").newbyteorder("L")
+EDGE_AFFINITY = np.dtype("float32").newbyteorder("L")
+EDGE_AREA = np.dtype("uint64").newbyteorder("L")
 
-COUNTER = np.dtype('int64').newbyteorder('B')
+COUNTER = np.dtype("int64").newbyteorder("B")
 
-COORDINATES = np.dtype('int64').newbyteorder('L')
-CHUNKSIZE = np.dtype('uint64').newbyteorder('L')
-FANOUT = np.dtype('uint64').newbyteorder('L')
-LAYERCOUNT = np.dtype('uint64').newbyteorder('L')
-SPATIALBITS = np.dtype('uint64').newbyteorder('L')
-ROOTCOUNTERBITS = np.dtype('uint64').newbyteorder('L')
-SKIPCONNECTIONS = np.dtype('uint64').newbyteorder('L')
\ No newline at end of file
+COORDINATES = np.dtype("int64").newbyteorder("L")
+CHUNKSIZE = np.dtype("uint64").newbyteorder("L")
+FANOUT = np.dtype("uint64").newbyteorder("L")
+LAYERCOUNT = np.dtype("uint64").newbyteorder("L")
+SPATIALBITS = np.dtype("uint64").newbyteorder("L")
+ROOTCOUNTERBITS = np.dtype("uint64").newbyteorder("L")
+SKIPCONNECTIONS = np.dtype("uint64").newbyteorder("L")
diff --git a/pychunkedgraph/ingest/create/parent_layer.py b/pychunkedgraph/ingest/create/parent_layer.py
index a777d9efc..90b24d26a 100644
--- a/pychunkedgraph/ingest/create/parent_layer.py
+++ b/pychunkedgraph/ingest/create/parent_layer.py
@@ -164,7 +164,8 @@ def _children_rows(
     """
     rows = []
     children_cx_edges = []
-    for child in children:
+    children_layers = cg.get_chunk_layers(children)
+    for child, node_layer in zip(children, children_layers):
         node_layer = cg.get_chunk_layer(child)
         row_id = serializers.serialize_uint64(child)
         val_dict = {attributes.Hierarchy.Parent: parent_id}
diff --git a/pychunkedgraph/tests/helpers.py b/pychunkedgraph/tests/helpers.py
index b9c689ad6..551c596bf 100644
--- a/pychunkedgraph/tests/helpers.py
+++ b/pychunkedgraph/tests/helpers.py
@@ -257,6 +257,7 @@ def create_chunk(cg, vertices=None, edges=None, timestamp=None):
         cg.get_chunk_coordinates(chunk_id),
         chunk_edges_active,
         isolated=isolated_ids,
+        time_stamp=timestamp,
     )
 
 
diff --git a/pychunkedgraph/tests/test_uncategorized.py b/pychunkedgraph/tests/test_uncategorized.py
index 8b26f5c5e..5c2de29d4 100644
--- a/pychunkedgraph/tests/test_uncategorized.py
+++ b/pychunkedgraph/tests/test_uncategorized.py
@@ -1,20 +1,10 @@
-import collections
-import os
-import subprocess
-import sys
 from time import sleep
-from datetime import datetime, timedelta
-from functools import partial
+from datetime import datetime, timedelta, UTC
 from math import inf
-from signal import SIGTERM
-from unittest import mock
 from warnings import warn
 
 import numpy as np
 import pytest
-from google.auth import credentials
-from google.cloud import bigtable
-from grpc._channel import _Rendezvous
 
 from .helpers import (
     bigtable_emulator,
@@ -24,13 +14,14 @@
     to_label,
     sv_data,
 )
+from ..graph import ChunkedGraph
 from ..graph import types
 from ..graph import attributes
 from ..graph import exceptions
-from ..graph import chunkedgraph
 from ..graph.edges import Edges
 from ..graph.utils import basetypes
-from ..graph.misc import get_delta_roots
+from ..graph.lineage import lineage_graph
+from ..graph.misc import get_delta_roots, get_latest_roots
 from ..graph.cutting import run_multicut
 from ..graph.lineage import get_root_id_history
 from ..graph.lineage import get_future_root_ids
@@ -452,7 +443,7 @@ def test_double_chunk_creation(self, gen_graph):
         cg = gen_graph(n_layers=4, atomic_chunk_bounds=atomic_chunk_bounds)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -775,7 +766,7 @@ def test_merge_pair_same_chunk(self, gen_graph):
         cg = gen_graph(n_layers=2, atomic_chunk_bounds=atomic_chunk_bounds)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
@@ -815,7 +806,7 @@ def test_merge_pair_neighboring_chunks(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -871,7 +862,7 @@ def test_merge_pair_disconnected_chunks(self, gen_graph):
         cg = gen_graph(n_layers=5)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -955,7 +946,7 @@ def test_merge_pair_already_connected(self, gen_graph):
         cg = gen_graph(n_layers=2)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
@@ -996,7 +987,7 @@ def test_merge_triple_chain_to_full_circle_same_chunk(self, gen_graph):
         cg = gen_graph(n_layers=2)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[
@@ -1033,7 +1024,7 @@ def test_merge_triple_chain_to_full_circle_neighboring_chunks(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
@@ -1082,7 +1073,7 @@ def test_merge_triple_chain_to_full_circle_disconnected_chunks(self, gen_graph):
         cg = gen_graph(n_layers=5)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
@@ -1181,7 +1172,7 @@ def test_merge_same_node(self, gen_graph):
         cg = gen_graph(n_layers=2)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -1223,7 +1214,7 @@ def test_merge_pair_abstract_nodes(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -1352,7 +1343,7 @@ def test_cross_edges(self, gen_graph):
         cg = gen_graph(n_layers=5)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[
@@ -1466,81 +1457,72 @@ def test_multiple_cuts_and_splits(self, gen_graph_simplequerytest):
         child_ids = np.concatenate(child_ids)
 
         for i in range(10):
-
-            print(f"\n\nITERATION {i}/10")
-            print("\n\nMERGE 1 & 3\n\n")
+            print(f"\n\nITERATION {i}/10 - MERGE 1 & 3")
             new_roots = cg.add_edges(
                 "Jane Doe",
                 [to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 1)],
                 affinities=0.9,
             ).new_root_ids
-            assert len(new_roots) == 1
+            assert len(new_roots) == 1, new_roots
             assert len(cg.get_subgraph([new_roots[0]], leaves_only=True)) == 4
 
-            root_ids = []
-            for child_id in child_ids:
-                root_ids.append(cg.get_root(child_id))
-
+            root_ids = cg.get_roots(child_ids, assert_roots=True)
+            print(child_ids)
+            print(list(root_ids))
             u_root_ids = np.unique(root_ids)
-            assert len(u_root_ids) == 1
+            assert len(u_root_ids) == 1, u_root_ids
 
             # ------------------------------------------------------------------
+            print(f"\n\nITERATION {i}/10 - SPLIT 2 & 3")
             new_roots = cg.remove_edges(
                 "John Doe",
                 source_ids=to_label(cg, 1, 1, 0, 0, 0),
                 sink_ids=to_label(cg, 1, 1, 0, 0, 1),
                 mincut=False,
             ).new_root_ids
+            assert len(new_roots) == 2, new_roots
 
-            assert len(np.unique(new_roots)) == 2
-
-            root_ids = []
-            for child_id in child_ids:
-                root_ids.append(cg.get_root(child_id))
-
+            root_ids = cg.get_roots(child_ids, assert_roots=True)
+            print(child_ids)
+            print(list(root_ids))
             u_root_ids = np.unique(root_ids)
             these_child_ids = []
             for root_id in u_root_ids:
                 these_child_ids.extend(cg.get_subgraph([root_id], leaves_only=True))
 
             assert len(these_child_ids) == 4
-            assert len(u_root_ids) == 2
+            assert len(u_root_ids) == 2, u_root_ids
 
             # ------------------------------------------------------------------
-
+            print(f"\n\nITERATION {i}/10 - SPLIT 1 & 3")
             new_roots = cg.remove_edges(
                 "Jane Doe",
                 source_ids=to_label(cg, 1, 0, 0, 0, 0),
                 sink_ids=to_label(cg, 1, 1, 0, 0, 1),
                 mincut=False,
             ).new_root_ids
-            assert len(new_roots) == 2
-
-            root_ids = []
-            for child_id in child_ids:
-                root_ids.append(cg.get_root(child_id))
+            assert len(new_roots) == 2, new_roots
 
+            root_ids = cg.get_roots(child_ids, assert_roots=True)
+            print(child_ids)
+            print(list(root_ids))
             u_root_ids = np.unique(root_ids)
-            assert len(u_root_ids) == 3
+            assert len(u_root_ids) == 3, u_root_ids
 
             # ------------------------------------------------------------------
-
-            print(f"\n\nITERATION {i}/10")
-            print("\n\nMERGE 2 & 3\n\n")
-
+            print(f"\n\nITERATION {i}/10 - MERGE 2 & 3")
             new_roots = cg.add_edges(
                 "Jane Doe",
                 [to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 1)],
                 affinities=0.9,
             ).new_root_ids
-            assert len(new_roots) == 1
-
-            root_ids = []
-            for child_id in child_ids:
-                root_ids.append(cg.get_root(child_id))
+            assert len(new_roots) == 1, new_roots
 
+            root_ids = cg.get_roots(child_ids, assert_roots=True)
+            print(child_ids)
+            print(list(root_ids))
             u_root_ids = np.unique(root_ids)
-            assert len(u_root_ids) == 2
+            assert len(u_root_ids) == 2, u_root_ids
 
             # for root_id in root_ids:
             #     cross_edge_dict_layers = graph_tests.root_cross_edge_test(
@@ -1575,7 +1557,7 @@ def test_cut_regular_link(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -1614,7 +1596,7 @@ def test_cut_regular_link(self, gen_graph):
             disallow_isolating_cut=True,
         ).new_root_ids
 
-        # Check New State
+        # verify new state
         assert len(new_root_ids) == 2
         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) != cg.get_root(
             to_label(cg, 1, 1, 0, 0, 0)
@@ -1646,7 +1628,7 @@ def test_cut_no_link(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -1707,7 +1689,7 @@ def test_cut_old_link(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -1775,7 +1757,7 @@ def test_cut_indivisible_link(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
@@ -1837,7 +1819,7 @@ def test_mincut_disrespects_sources_or_sinks(self, gen_graph):
         """
         cg = gen_graph(n_layers=2)
 
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[
@@ -1877,13 +1859,11 @@ def test_path_augmented_multicut(self, sv_data):
         edges = Edges(
             sv_edges[:, 0], sv_edges[:, 1], affinities=sv_affinity, areas=sv_area
         )
-
         cut_edges_aug = run_multicut(edges, sv_sources, sv_sinks, path_augment=True)
         assert cut_edges_aug.shape[0] == 350
 
         with pytest.raises(exceptions.PreconditionError):
             run_multicut(edges, sv_sources, sv_sinks, path_augment=False)
-        pass
 
 
 class TestGraphHistory:
@@ -1901,20 +1881,14 @@ def test_cut_merge_history(self, gen_graph):
         (1) Split 1 and 2
         (2) Merge 1 and 2
         """
-        from ..graph.lineage import lineage_graph
-
-        cg = gen_graph(n_layers=3)
-
-        # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        cg: ChunkedGraph = gen_graph(n_layers=3)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
             edges=[(to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), 0.5)],
             timestamp=fake_timestamp,
         )
-
-        # Preparation: Build Chunk B
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 1, 0, 0, 0)],
@@ -1932,7 +1906,7 @@ def test_cut_merge_history(self, gen_graph):
 
         first_root = cg.get_root(to_label(cg, 1, 0, 0, 0, 0))
         assert first_root == cg.get_root(to_label(cg, 1, 1, 0, 0, 0))
-        timestamp_before_split = datetime.utcnow()
+        timestamp_before_split = datetime.now(UTC)
         split_roots = cg.remove_edges(
             "Jane Doe",
             source_ids=to_label(cg, 1, 0, 0, 0, 0),
@@ -1945,7 +1919,7 @@ def test_cut_merge_history(self, gen_graph):
         g = lineage_graph(cg, split_roots)
         assert g.size() == 2
 
-        timestamp_after_split = datetime.utcnow()
+        timestamp_after_split = datetime.now(UTC)
         merge_roots = cg.add_edges(
             "Jane Doe",
             [to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0)],
@@ -1953,7 +1927,7 @@ def test_cut_merge_history(self, gen_graph):
         ).new_root_ids
         assert len(merge_roots) == 1
         merge_root = merge_roots[0]
-        timestamp_after_merge = datetime.utcnow()
+        timestamp_after_merge = datetime.now(UTC)
 
         g = lineage_graph(cg, merge_roots)
         assert g.size() == 4
@@ -2047,7 +2021,7 @@ def test_lock_unlock(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -2113,7 +2087,7 @@ def test_lock_expiration(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -2181,7 +2155,7 @@ def test_lock_renew(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -2233,7 +2207,7 @@ def test_lock_merge_lock_old_id(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -2299,7 +2273,7 @@ def test_indefinite_lock(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -2372,7 +2346,7 @@ def test_indefinite_lock_with_normal_lock_expiration(self, gen_graph):
         cg = gen_graph(n_layers=3)
 
         # Preparation: Build Chunk A
-        fake_timestamp = datetime.utcnow() - timedelta(days=10)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
         create_chunk(
             cg,
             vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -2451,7 +2425,7 @@ def test_indefinite_lock_with_normal_lock_expiration(self, gen_graph):
     #     cg = gen_graph(n_layers=3)
 
     #     # Preparation: Build Chunk A
-    #     fake_timestamp = datetime.utcnow() - timedelta(days=10)
+    #     fake_timestamp = datetime.now(UTC) - timedelta(days=10)
     #     create_chunk(
     #         cg,
     #         vertices=[to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2)],
@@ -2467,7 +2441,7 @@ def test_indefinite_lock_with_normal_lock_expiration(self, gen_graph):
     #         timestamp=fake_timestamp,
     #     )
 
-    #     add_layer(
+    #     add_parent_chunk(
     #         cg, 3, [0, 0, 0], time_stamp=fake_timestamp, n_threads=1,
     #     )
 
@@ -2491,1054 +2465,951 @@ def test_indefinite_lock_with_normal_lock_expiration(self, gen_graph):
     #     )[0]
 
 
-# class MockChunkedGraph:
-#     """
-#     Dummy class to mock partial functionality of the ChunkedGraph for use in unit tests.
-#     Feel free to add more functions as need be. Can pass in alternative member functions into constructor.
-#     """
-
-#     def __init__(
-#         self, get_chunk_coordinates=None, get_chunk_layer=None, get_chunk_id=None
-#     ):
-#         if get_chunk_coordinates is not None:
-#             self.get_chunk_coordinates = get_chunk_coordinates
-#         if get_chunk_layer is not None:
-#             self.get_chunk_layer = get_chunk_layer
-#         if get_chunk_id is not None:
-#             self.get_chunk_id = get_chunk_id
-
-#     def get_chunk_coordinates(self, chunk_id):  # pylint: disable=method-hidden
-#         return np.array([0, 0, 0])
-
-#     def get_chunk_layer(self, chunk_id):  # pylint: disable=method-hidden
-#         return 2
-
-#     def get_chunk_id(self, *args):  # pylint: disable=method-hidden
-#         return 0
-
-
-# class TestGraphSplit:
-#     @pytest.mark.timeout(30)
-#     def test_split_pair_same_chunk(self, gen_graph):
-#         """
-#         Remove edge between existing RG supervoxels 1 and 2 (same chunk)
-#         Expected: Different (new) parents for RG 1 and 2 on Layer two
-#         ┌─────┐      ┌─────┐
-#         │  A¹ │      │  A¹ │
-#         │ 1━2 │  =>  │ 1 2 │
-#         │     │      │     │
-#         └─────┘      └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=2)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
-#             edges=[(to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5)],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 0, 0, 0, 1),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             mincut=False,
-#         ).new_root_ids
-
-#         # Check New State
-#         assert len(new_root_ids) == 2
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) != cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 1)
-#         )
-#         leaves = np.unique(
-#             cg.get_subgraph([cg.get_root(to_label(cg, 1, 0, 0, 0, 0))], leaves_only=True)
-#         )
-#         assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         leaves = np.unique(
-#             cg.get_subgraph([cg.get_root(to_label(cg, 1, 0, 0, 0, 1))], leaves_only=True)
-#         )
-#         assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 1) in leaves
-
-#         # Check Old State still accessible
-#         assert cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
-#         ) == cg.get_root(to_label(cg, 1, 0, 0, 0, 1), time_stamp=fake_timestamp)
-#         leaves = np.unique(
-#             cg.get_subgraph(
-#                 [cg.get_root(to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp)],
-#                 leaves_only=True,
-#             )
-#         )
-#         assert len(leaves) == 2
-#         assert to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         assert to_label(cg, 1, 0, 0, 0, 1) in leaves
-
-#         # assert len(cg.get_latest_roots()) == 2
-#         # assert len(cg.get_latest_roots(fake_timestamp)) == 1
-
-#     def test_split_nonexisting_edge(self, gen_graph):
-#         """
-#         Remove edge between existing RG supervoxels 1 and 2 (same chunk)
-#         Expected: Different (new) parents for RG 1 and 2 on Layer two
-#         ┌─────┐      ┌─────┐
-#         │  A¹ │      │  A¹ │
-#         │ 1━2 │  =>  │ 1━2 │
-#         │   | │      │   | │
-#         │   3 │      │   3 │
-#         └─────┘      └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=2)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
-#             edges=[
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
-#                 (to_label(cg, 1, 0, 0, 0, 2), to_label(cg, 1, 0, 0, 0, 1), 0.5),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 2),
-#             mincut=False,
-#         ).new_root_ids
-
-#         assert len(new_root_ids) == 1
-
-#     @pytest.mark.timeout(30)
-#     def test_split_pair_neighboring_chunks(self, gen_graph):
-#         """
-#         Remove edge between existing RG supervoxels 1 and 2 (neighboring chunks)
-#         ┌─────┬─────┐      ┌─────┬─────┐
-#         │  A¹ │  B¹ │      │  A¹ │  B¹ │
-#         │  1━━┿━━2  │  =>  │  1  │  2  │
-#         │     │     │      │     │     │
-#         └─────┴─────┘      └─────┴─────┘
-#         """
-
-#         cg = gen_graph(n_layers=3)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
-#             edges=[(to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), 1.0)],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Preparation: Build Chunk B
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 1, 0, 0, 0)],
-#             edges=[(to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), 1.0)],
-#             timestamp=fake_timestamp,
-#         )
-
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 1, 0, 0, 0),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             mincut=False,
-#         ).new_root_ids
-
-#         # Check New State
-#         assert len(new_root_ids) == 2
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) != cg.get_root(
-#             to_label(cg, 1, 1, 0, 0, 0)
-#         )
-#         leaves = np.unique(
-#             cg.get_subgraph([cg.get_root(to_label(cg, 1, 0, 0, 0, 0))], leaves_only=True)
-#         )
-#         assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         leaves = np.unique(
-#             cg.get_subgraph([cg.get_root(to_label(cg, 1, 1, 0, 0, 0))], leaves_only=True)
-#         )
-#         assert len(leaves) == 1 and to_label(cg, 1, 1, 0, 0, 0) in leaves
-
-#         # Check Old State still accessible
-#         assert cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
-#         ) == cg.get_root(to_label(cg, 1, 1, 0, 0, 0), time_stamp=fake_timestamp)
-#         leaves = np.unique(
-#             cg.get_subgraph(
-#                 [cg.get_root(to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp)],
-#                 leaves_only=True,
-#             )
-#         )
-#         assert len(leaves) == 2
-#         assert to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         assert to_label(cg, 1, 1, 0, 0, 0) in leaves
-
-#         assert len(cg.get_latest_roots()) == 2
-#         assert len(cg.get_latest_roots(fake_timestamp)) == 1
-
-#     @pytest.mark.timeout(30)
-#     def test_split_verify_cross_chunk_edges(self, gen_graph):
-#         """
-#         Remove edge between existing RG supervoxels 1 and 2 (neighboring chunks)
-#         ┌─────┬─────┬─────┐      ┌─────┬─────┬─────┐
-#         |     │  A¹ │  B¹ │      |     │  A¹ │  B¹ │
-#         |     │  1━━┿━━3  │  =>  |     │  1━━┿━━3  │
-#         |     │  |  │     │      |     │     │     │
-#         |     │  2  │     │      |     │  2  │     │
-#         └─────┴─────┴─────┘      └─────┴─────┴─────┘
-#         """
-
-#         cg = gen_graph(n_layers=4)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 1)],
-#             edges=[
-#                 (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 2, 0, 0, 0), inf),
-#                 (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 1), 0.5),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Preparation: Build Chunk B
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 2, 0, 0, 0)],
-#             edges=[(to_label(cg, 1, 2, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), inf)],
-#             timestamp=fake_timestamp,
-#         )
-
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             4,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-
-#         assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
-#             to_label(cg, 1, 1, 0, 0, 1)
-#         )
-#         assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
-#             to_label(cg, 1, 2, 0, 0, 0)
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 1, 0, 0, 0),
-#             sink_ids=to_label(cg, 1, 1, 0, 0, 1),
-#             mincut=False,
-#         ).new_root_ids
-
-#         assert len(new_root_ids) == 2
-
-#         svs2 = cg.get_subgraph([new_root_ids[0]], leaves_only=True)
-#         svs1 = cg.get_subgraph([new_root_ids[1]], leaves_only=True)
-#         len_set = {1, 2}
-#         assert len(svs1) in len_set
-#         len_set.remove(len(svs1))
-#         assert len(svs2) in len_set
-
-#         # Check New State
-#         assert len(new_root_ids) == 2
-#         assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) != cg.get_root(
-#             to_label(cg, 1, 1, 0, 0, 1)
-#         )
-#         assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
-#             to_label(cg, 1, 2, 0, 0, 0)
-#         )
-
-#         cc_dict = cg.get_atomic_cross_edges(
-#             cg.get_parent(to_label(cg, 1, 1, 0, 0, 0))
-#         )
-#         assert len(cc_dict[3]) == 1
-#         assert cc_dict[3][0][0] == to_label(cg, 1, 1, 0, 0, 0)
-#         assert cc_dict[3][0][1] == to_label(cg, 1, 2, 0, 0, 0)
-
-#         assert len(cg.get_latest_roots()) == 2
-#         assert len(cg.get_latest_roots(fake_timestamp)) == 1
-
-#     @pytest.mark.timeout(30)
-#     def test_split_verify_loop(self, gen_graph):
-#         """
-#         Remove edge between existing RG supervoxels 1 and 2 (neighboring chunks)
-#         ┌─────┬────────┬─────┐      ┌─────┬────────┬─────┐
-#         |     │     A¹ │  B¹ │      |     │     A¹ │  B¹ │
-#         |     │  4━━1━━┿━━5  │  =>  |     │  4  1━━┿━━5  │
-#         |     │   /    │  |  │      |     │        │  |  │
-#         |     │  3  2━━┿━━6  │      |     │  3  2━━┿━━6  │
-#         └─────┴────────┴─────┘      └─────┴────────┴─────┘
-#         """
-
-#         cg = gen_graph(n_layers=4)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[
-#                 to_label(cg, 1, 1, 0, 0, 0),
-#                 to_label(cg, 1, 1, 0, 0, 1),
-#                 to_label(cg, 1, 1, 0, 0, 2),
-#                 to_label(cg, 1, 1, 0, 0, 3),
-#             ],
-#             edges=[
-#                 (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 2, 0, 0, 0), inf),
-#                 (to_label(cg, 1, 1, 0, 0, 1), to_label(cg, 1, 2, 0, 0, 1), inf),
-#                 (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 2), 0.5),
-#                 (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 3), 0.5),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Preparation: Build Chunk B
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 2, 0, 0, 0), to_label(cg, 1, 2, 0, 0, 1)],
-#             edges=[
-#                 (to_label(cg, 1, 2, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), inf),
-#                 (to_label(cg, 1, 2, 0, 0, 1), to_label(cg, 1, 1, 0, 0, 1), inf),
-#                 (to_label(cg, 1, 2, 0, 0, 1), to_label(cg, 1, 2, 0, 0, 0), 0.5),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             4,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-
-#         assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
-#             to_label(cg, 1, 1, 0, 0, 1)
-#         )
-#         assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
-#             to_label(cg, 1, 2, 0, 0, 0)
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 1, 0, 0, 0),
-#             sink_ids=to_label(cg, 1, 1, 0, 0, 2),
-#             mincut=False,
-#         ).new_root_ids
-
-#         assert len(new_root_ids) == 2
-
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 1, 0, 0, 0),
-#             sink_ids=to_label(cg, 1, 1, 0, 0, 3),
-#             mincut=False,
-#         ).new_root_ids
-
-#         assert len(new_root_ids) == 2
-
-#         cc_dict = cg.get_atomic_cross_edges(
-#             cg.get_parent(to_label(cg, 1, 1, 0, 0, 0))
-#         )
-#         assert len(cc_dict[3]) == 1
-#         cc_dict = cg.get_atomic_cross_edges(
-#             cg.get_parent(to_label(cg, 1, 1, 0, 0, 0))
-#         )
-#         assert len(cc_dict[3]) == 1
-
-#         assert len(cg.get_latest_roots()) == 3
-#         assert len(cg.get_latest_roots(fake_timestamp)) == 1
-
-#     @pytest.mark.timeout(30)
-#     def test_split_pair_disconnected_chunks(self, gen_graph):
-#         """
-#         Remove edge between existing RG supervoxels 1 and 2 (disconnected chunks)
-#         ┌─────┐     ┌─────┐      ┌─────┐     ┌─────┐
-#         │  A¹ │ ... │  Z¹ │      │  A¹ │ ... │  Z¹ │
-#         │  1━━┿━━━━━┿━━2  │  =>  │  1  │     │  2  │
-#         │     │     │     │      │     │     │     │
-#         └─────┘     └─────┘      └─────┘     └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=9)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
-#             edges=[(to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 7, 7, 7, 0), 1.0,)],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Preparation: Build Chunk Z
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 7, 7, 7, 0)],
-#             edges=[(to_label(cg, 1, 7, 7, 7, 0), to_label(cg, 1, 0, 0, 0, 0), 1.0,)],
-#             timestamp=fake_timestamp,
-#         )
-
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             4,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             4,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             5,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             5,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             6,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             6,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             7,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             7,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             8,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             8,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-#         add_layer(
-#             cg,
-#             9,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-
-#         # Split
-#         new_roots = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 7, 7, 7, 0),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             mincut=False,
-#         ).new_root_ids
-
-#         # Check New State
-#         assert len(new_roots) == 2
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) != cg.get_root(
-#             to_label(cg, 1, 7, 7, 7, 0)
-#         )
-#         leaves = np.unique(
-#             cg.get_subgraph([cg.get_root(to_label(cg, 1, 0, 0, 0, 0))], leaves_only=True)
-#         )
-#         assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         leaves = np.unique(
-#             cg.get_subgraph([cg.get_root(to_label(cg, 1, 7, 7, 7, 0))], leaves_only=True)
-#         )
-#         assert len(leaves) == 1 and to_label(cg, 1, 7, 7, 7, 0) in leaves
-
-#         # Check Old State still accessible
-#         assert cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
-#         ) == cg.get_root(to_label(cg, 1, 7, 7, 7, 0), time_stamp=fake_timestamp)
-#         leaves = np.unique(
-#             cg.get_subgraph(
-#                 [cg.get_root(to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp)],
-#                 leaves_only=True,
-#             )
-#         )
-#         assert len(leaves) == 2
-#         assert to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         assert to_label(cg, 1, 7, 7, 7, 0) in leaves
-
-#     @pytest.mark.timeout(30)
-#     def test_split_pair_already_disconnected(self, gen_graph):
-#         """
-#         Try to remove edge between already disconnected RG supervoxels 1 and 2 (same chunk).
-#         Expected: No change, no error
-#         ┌─────┐      ┌─────┐
-#         │  A¹ │      │  A¹ │
-#         │ 1 2 │  =>  │ 1 2 │
-#         │     │      │     │
-#         └─────┘      └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=2)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
-#             edges=[],
-#             timestamp=fake_timestamp,
-#         )
-
-#         res_old = cg.client._table.read_rows()
-#         res_old.consume_all()
-
-#         # Split
-#         with pytest.raises(exceptions.PreconditionError):
-#             cg.remove_edges(
-#                 "Jane Doe",
-#                 source_ids=to_label(cg, 1, 0, 0, 0, 1),
-#                 sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#                 mincut=False,
-#             )
-
-#         res_new = cg.client._table.read_rows()
-#         res_new.consume_all()
-
-#         # Check
-#         if res_old.rows != res_new.rows:
-#             warn(
-#                 "Rows were modified when splitting a pair of already disconnected supervoxels. "
-#                 "While probably not an error, it is an unnecessary operation."
-#             )
-
-#     @pytest.mark.timeout(30)
-#     def test_split_full_circle_to_triple_chain_same_chunk(self, gen_graph):
-#         """
-#         Remove direct edge between RG supervoxels 1 and 2, but leave indirect connection (same chunk)
-#         ┌─────┐      ┌─────┐
-#         │  A¹ │      │  A¹ │
-#         │ 1━2 │  =>  │ 1 2 │
-#         │ ┗3┛ │      │ ┗3┛ │
-#         └─────┘      └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=2)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[
-#                 to_label(cg, 1, 0, 0, 0, 0),
-#                 to_label(cg, 1, 0, 0, 0, 1),
-#                 to_label(cg, 1, 0, 0, 0, 2),
-#             ],
-#             edges=[
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 2), 0.5),
-#                 (to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2), 0.5),
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.3),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 0, 0, 0, 1),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             mincut=False,
-#         ).new_root_ids
-
-#         # Check New State
-#         assert len(new_root_ids) == 1
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == new_root_ids[0]
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 1)) == new_root_ids[0]
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 2)) == new_root_ids[0]
-#         leaves = np.unique(cg.get_subgraph([new_root_ids[0]], leaves_only=True))
-#         assert len(leaves) == 3
-#         assert to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         assert to_label(cg, 1, 0, 0, 0, 1) in leaves
-#         assert to_label(cg, 1, 0, 0, 0, 2) in leaves
-
-#         # Check Old State still accessible
-#         old_root_id = cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
-#         )
-#         assert new_root_ids[0] != old_root_id
-
-#         # assert len(cg.get_latest_roots()) == 1
-#         # assert len(cg.get_latest_roots(fake_timestamp)) == 1
-
-#     @pytest.mark.timeout(30)
-#     def test_split_full_circle_to_triple_chain_neighboring_chunks(self, gen_graph):
-#         """
-#         Remove direct edge between RG supervoxels 1 and 2, but leave indirect connection (neighboring chunks)
-#         ┌─────┬─────┐      ┌─────┬─────┐
-#         │  A¹ │  B¹ │      │  A¹ │  B¹ │
-#         │  1━━┿━━2  │  =>  │  1  │  2  │
-#         │  ┗3━┿━━┛  │      │  ┗3━┿━━┛  │
-#         └─────┴─────┘      └─────┴─────┘
-#         """
-
-#         cg = gen_graph(n_layers=3)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
-#             edges=[
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
-#                 (to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 1, 0, 0, 0), 0.5),
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), 0.3),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Preparation: Build Chunk B
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 1, 0, 0, 0)],
-#             edges=[
-#                 (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
-#                 (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), 0.3),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 1, 0, 0, 0),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             mincut=False,
-#         ).new_root_ids
-
-#         # Check New State
-#         assert len(new_root_ids) == 1
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == new_root_ids[0]
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 1)) == new_root_ids[0]
-#         assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == new_root_ids[0]
-#         leaves = np.unique(cg.get_subgraph([new_root_ids[0]], leaves_only=True))
-#         assert len(leaves) == 3
-#         assert to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         assert to_label(cg, 1, 0, 0, 0, 1) in leaves
-#         assert to_label(cg, 1, 1, 0, 0, 0) in leaves
-
-#         # Check Old State still accessible
-#         old_root_id = cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
-#         )
-#         assert new_root_ids[0] != old_root_id
-
-#         assert len(cg.get_latest_roots()) == 1
-#         assert len(cg.get_latest_roots(fake_timestamp)) == 1
-
-#     @pytest.mark.timeout(30)
-#     def test_split_full_circle_to_triple_chain_disconnected_chunks(self, gen_graph):
-#         """
-#         Remove direct edge between RG supervoxels 1 and 2, but leave indirect connection (disconnected chunks)
-#         ┌─────┐     ┌─────┐      ┌─────┐     ┌─────┐
-#         │  A¹ │ ... │  Z¹ │      │  A¹ │ ... │  Z¹ │
-#         │  1━━┿━━━━━┿━━2  │  =>  │  1  │     │  2  │
-#         │  ┗3━┿━━━━━┿━━┛  │      │  ┗3━┿━━━━━┿━━┛  │
-#         └─────┘     └─────┘      └─────┘     └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=9)
-
-#         loc = 2
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
-#             edges=[
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
-#                 (to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, loc, loc, loc, 0), 0.5,),
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, loc, loc, loc, 0), 0.3,),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Preparation: Build Chunk Z
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, loc, loc, loc, 0)],
-#             edges=[
-#                 (to_label(cg, 1, loc, loc, loc, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5,),
-#                 (to_label(cg, 1, loc, loc, loc, 0), to_label(cg, 1, 0, 0, 0, 0), 0.3,),
-#             ],
-#             timestamp=fake_timestamp,
-#         )
-
-#         for i_layer in range(3, 10):
-#             if loc // 2 ** (i_layer - 3) == 1:
-#                 add_layer(
-#                     cg,
-#                     i_layer,
-#                     [0, 0, 0],
-#
-#                     time_stamp=fake_timestamp,
-#                     n_threads=1,
-#                 )
-#             elif loc // 2 ** (i_layer - 3) == 0:
-#                 add_layer(
-#                     cg,
-#                     i_layer,
-#                     [0, 0, 0],
-#
-#                     time_stamp=fake_timestamp,
-#                     n_threads=1,
-#                 )
-#             else:
-#                 add_layer(
-#                     cg,
-#                     i_layer,
-#                     [0, 0, 0],
-#
-#                     time_stamp=fake_timestamp,
-#                     n_threads=1,
-#                 )
-#                 add_layer(
-#                     cg,
-#                     i_layer,
-#                     [0, 0, 0],
-#
-#                     time_stamp=fake_timestamp,
-#                     n_threads=1,
-#                 )
-
-#         assert (
-#             cg.get_root(to_label(cg, 1, loc, loc, loc, 0))
-#             == cg.get_root(to_label(cg, 1, 0, 0, 0, 0))
-#             == cg.get_root(to_label(cg, 1, 0, 0, 0, 1))
-#         )
-
-#         # Split
-#         new_root_ids = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, loc, loc, loc, 0),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             mincut=False,
-#         ).new_root_ids
-
-#         # Check New State
-#         assert len(new_root_ids) == 1
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == new_root_ids[0]
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 1)) == new_root_ids[0]
-#         assert cg.get_root(to_label(cg, 1, loc, loc, loc, 0)) == new_root_ids[0]
-#         leaves = np.unique(cg.get_subgraph([new_root_ids[0]], leaves_only=True))
-#         assert len(leaves) == 3
-#         assert to_label(cg, 1, 0, 0, 0, 0) in leaves
-#         assert to_label(cg, 1, 0, 0, 0, 1) in leaves
-#         assert to_label(cg, 1, loc, loc, loc, 0) in leaves
-
-#         # Check Old State still accessible
-#         old_root_id = cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
-#         )
-#         assert new_root_ids[0] != old_root_id
-
-#         assert len(cg.get_latest_roots()) == 1
-#         assert len(cg.get_latest_roots(fake_timestamp)) == 1
-
-#     @pytest.mark.timeout(30)
-#     def test_split_same_node(self, gen_graph):
-#         """
-#         Try to remove (non-existing) edge between RG supervoxel 1 and itself
-#         ┌─────┐
-#         │  A¹ │
-#         │  1  │  =>  Reject
-#         │     │
-#         └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=2)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
-#             edges=[],
-#             timestamp=fake_timestamp,
-#         )
-
-#         res_old = cg.client._table.read_rows()
-#         res_old.consume_all()
-
-#         # Split
-#         with pytest.raises(exceptions.PreconditionError):
-#             cg.remove_edges(
-#                 "Jane Doe",
-#                 source_ids=to_label(cg, 1, 0, 0, 0, 0),
-#                 sink_ids=to_label(cg, 1, 0, 0, 0, 0),
-#                 mincut=False,
-#             )
-
-#         res_new = cg.client._table.read_rows()
-#         res_new.consume_all()
-
-#         assert res_new.rows == res_old.rows
-
-#     @pytest.mark.timeout(30)
-#     def test_split_pair_abstract_nodes(self, gen_graph):
-#         """
-#         Try to remove (non-existing) edge between RG supervoxel 1 and abstract node "2"
-#                     ┌─────┐
-#                     │  B² │
-#                     │ "2" │
-#                     │     │
-#                     └─────┘
-#         ┌─────┐              =>  Reject
-#         │  A¹ │
-#         │  1  │
-#         │     │
-#         └─────┘
-#         """
-
-#         cg = gen_graph(n_layers=3)
-
-#         # Preparation: Build Chunk A
-#         fake_timestamp = datetime.utcnow() - timedelta(days=10)
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0)],
-#             edges=[],
-#             timestamp=fake_timestamp,
-#         )
-
-#         # Preparation: Build Chunk B
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 1, 0, 0, 0)],
-#             edges=[],
-#             timestamp=fake_timestamp,
-#         )
-
-#         add_layer(
-#             cg,
-#             3,
-#             [0, 0, 0],
-#
-#             time_stamp=fake_timestamp,
-#             n_threads=1,
-#         )
-
-#         res_old = cg.client._table.read_rows()
-#         res_old.consume_all()
-
-#         # Split
-#         with pytest.raises(exceptions.PreconditionError):
-#             cg.remove_edges(
-#                 "Jane Doe",
-#                 source_ids=to_label(cg, 1, 0, 0, 0, 0),
-#                 sink_ids=to_label(cg, 2, 1, 0, 0, 1),
-#                 mincut=False,
-#             )
-
-#         res_new = cg.client._table.read_rows()
-#         res_new.consume_all()
-
-#         assert res_new.rows == res_old.rows
-
-#     @pytest.mark.timeout(30)
-#     def test_diagonal_connections(self, gen_graph):
-#         """
-#         Create graph with edge between RG supervoxels 1 and 2 (same chunk)
-#         and edge between RG supervoxels 1 and 3 (neighboring chunks)
-#         ┌─────┬─────┐
-#         │  A¹ │  B¹ │
-#         │ 2━1━┿━━3  │
-#         │  /  │     │
-#         ┌─────┬─────┐
-#         │  |  │     │
-#         │  4━━┿━━5  │
-#         │  C¹ │  D¹ │
-#         └─────┴─────┘
-#         """
-
-#         cg = gen_graph(n_layers=3)
-
-#         # Chunk A
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
-#             edges=[
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), inf),
-#                 (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 1, 0, 0), inf),
-#             ],
-#         )
-
-#         # Chunk B
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 1, 0, 0, 0)],
-#             edges=[(to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), inf)],
-#         )
-
-#         # Chunk C
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 0, 1, 0, 0)],
-#             edges=[
-#                 (to_label(cg, 1, 0, 1, 0, 0), to_label(cg, 1, 1, 1, 0, 0), inf),
-#                 (to_label(cg, 1, 0, 1, 0, 0), to_label(cg, 1, 0, 0, 0, 0), inf),
-#             ],
-#         )
-
-#         # Chunk D
-#         create_chunk(
-#             cg,
-#             vertices=[to_label(cg, 1, 1, 1, 0, 0)],
-#             edges=[(to_label(cg, 1, 1, 1, 0, 0), to_label(cg, 1, 0, 1, 0, 0), inf)],
-#         )
-
-#         add_layer(
-#             cg, 3, [0, 0, 0],  n_threads=1,
-#         )
-
-#         rr = cg.range_read_chunk(chunk_id=cg.get_chunk_id(layer=3, x=0, y=0, z=0))
-#         root_ids_t0 = list(rr.keys())
-
-#         assert len(root_ids_t0) == 1
-
-#         child_ids = []
-#         for root_id in root_ids_t0:
-#             child_ids.extend([cg.get_subgraph([root_id])], leaves_only=True)
-
-#         new_roots = cg.remove_edges(
-#             "Jane Doe",
-#             source_ids=to_label(cg, 1, 0, 0, 0, 0),
-#             sink_ids=to_label(cg, 1, 0, 0, 0, 1),
-#             mincut=False,
-#         ).new_root_ids
-
-#         assert len(new_roots) == 2
-#         assert cg.get_root(to_label(cg, 1, 1, 1, 0, 0)) == cg.get_root(
-#             to_label(cg, 1, 0, 1, 0, 0)
-#         )
-#         assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == cg.get_root(
-#             to_label(cg, 1, 0, 0, 0, 0)
-#         )
+class TestGraphSplit:
+    @pytest.mark.timeout(30)
+    def test_split_pair_same_chunk(self, gen_graph):
+        """
+        Remove edge between existing RG supervoxels 1 and 2 (same chunk)
+        Expected: Different (new) parents for RG 1 and 2 on Layer two
+        ┌─────┐      ┌─────┐
+        │  A¹ │      │  A¹ │
+        │ 1━2 │  =>  │ 1 2 │
+        │     │      │     │
+        └─────┘      └─────┘
+        """
+
+        cg: ChunkedGraph = gen_graph(n_layers=2)
+
+        # Preparation: Build Chunk A
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
+            edges=[(to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5)],
+            timestamp=fake_timestamp,
+        )
+
+        # Split
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 0, 0, 0, 1),
+            sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+            mincut=False,
+        ).new_root_ids
+
+        # verify new state
+        assert len(new_root_ids) == 2
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) != cg.get_root(
+            to_label(cg, 1, 0, 0, 0, 1)
+        )
+        leaves = np.unique(
+            cg.get_subgraph(
+                [cg.get_root(to_label(cg, 1, 0, 0, 0, 0))], leaves_only=True
+            )
+        )
+        assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 0) in leaves
+        leaves = np.unique(
+            cg.get_subgraph(
+                [cg.get_root(to_label(cg, 1, 0, 0, 0, 1))], leaves_only=True
+            )
+        )
+        assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 1) in leaves
+
+        # verify old state
+        cg.cache = None
+        assert cg.get_root(
+            to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
+        ) == cg.get_root(to_label(cg, 1, 0, 0, 0, 1), time_stamp=fake_timestamp)
+        leaves = np.unique(
+            cg.get_subgraph(
+                [cg.get_root(to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp)],
+                leaves_only=True,
+            )
+        )
+        assert len(leaves) == 2
+        assert to_label(cg, 1, 0, 0, 0, 0) in leaves
+        assert to_label(cg, 1, 0, 0, 0, 1) in leaves
+
+        assert len(get_latest_roots(cg)) == 2
+        assert len(get_latest_roots(cg, fake_timestamp)) == 1
+
+    def test_split_nonexisting_edge(self, gen_graph):
+        """
+        Remove edge between existing RG supervoxels 1 and 2 (same chunk)
+        Expected: Different (new) parents for RG 1 and 2 on Layer two
+        ┌─────┐      ┌─────┐
+        │  A¹ │      │  A¹ │
+        │ 1━2 │  =>  │ 1━2 │
+        │   | │      │   | │
+        │   3 │      │   3 │
+        └─────┘      └─────┘
+        """
+        cg = gen_graph(n_layers=2)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
+            edges=[
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
+                (to_label(cg, 1, 0, 0, 0, 2), to_label(cg, 1, 0, 0, 0, 1), 0.5),
+            ],
+            timestamp=fake_timestamp,
+        )
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 0, 0, 0, 0),
+            sink_ids=to_label(cg, 1, 0, 0, 0, 2),
+            mincut=False,
+        ).new_root_ids
+        assert len(new_root_ids) == 1
+
+    @pytest.mark.timeout(30)
+    def test_split_pair_neighboring_chunks(self, gen_graph):
+        """
+        Remove edge between existing RG supervoxels 1 and 2 (neighboring chunks)
+        ┌─────┬─────┐      ┌─────┬─────┐
+        │  A¹ │  B¹ │      │  A¹ │  B¹ │
+        │  1━━┿━━2  │  =>  │  1  │  2  │
+        │     │     │      │     │     │
+        └─────┴─────┘      └─────┴─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=3)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0)],
+            edges=[(to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), 1.0)],
+            timestamp=fake_timestamp,
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 1, 0, 0, 0)],
+            edges=[(to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), 1.0)],
+            timestamp=fake_timestamp,
+        )
+        add_parent_chunk(
+            cg,
+            3,
+            [0, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 1, 0, 0, 0),
+            sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+            mincut=False,
+        ).new_root_ids
+
+        # verify new state
+        assert len(new_root_ids) == 2
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) != cg.get_root(
+            to_label(cg, 1, 1, 0, 0, 0)
+        )
+        leaves = np.unique(
+            cg.get_subgraph(
+                [cg.get_root(to_label(cg, 1, 0, 0, 0, 0))], leaves_only=True
+            )
+        )
+        assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 0) in leaves
+        leaves = np.unique(
+            cg.get_subgraph(
+                [cg.get_root(to_label(cg, 1, 1, 0, 0, 0))], leaves_only=True
+            )
+        )
+        assert len(leaves) == 1 and to_label(cg, 1, 1, 0, 0, 0) in leaves
+
+        # verify old state
+        assert cg.get_root(
+            to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
+        ) == cg.get_root(to_label(cg, 1, 1, 0, 0, 0), time_stamp=fake_timestamp)
+        leaves = np.unique(
+            cg.get_subgraph(
+                [cg.get_root(to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp)],
+                leaves_only=True,
+            )
+        )
+        assert len(leaves) == 2
+        assert to_label(cg, 1, 0, 0, 0, 0) in leaves
+        assert to_label(cg, 1, 1, 0, 0, 0) in leaves
+        assert len(get_latest_roots(cg)) == 2
+        assert len(get_latest_roots(cg, fake_timestamp)) == 1
+
+    @pytest.mark.timeout(30)
+    def test_split_verify_cross_chunk_edges(self, gen_graph):
+        """
+        Remove edge between existing RG supervoxels 1 and 2 (neighboring chunks)
+        ┌─────┬─────┬─────┐      ┌─────┬─────┬─────┐
+        |     │  A¹ │  B¹ │      |     │  A¹ │  B¹ │
+        |     │  1━━┿━━3  │  =>  |     │  1━━┿━━3  │
+        |     │  |  │     │      |     │     │     │
+        |     │  2  │     │      |     │  2  │     │
+        └─────┴─────┴─────┘      └─────┴─────┴─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=4)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 1)],
+            edges=[
+                (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 2, 0, 0, 0), inf),
+                (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 1), 0.5),
+            ],
+            timestamp=fake_timestamp,
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 2, 0, 0, 0)],
+            edges=[(to_label(cg, 1, 2, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), inf)],
+            timestamp=fake_timestamp,
+        )
+
+        add_parent_chunk(
+            cg,
+            3,
+            [0, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+        add_parent_chunk(
+            cg,
+            3,
+            [1, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+        add_parent_chunk(
+            cg,
+            4,
+            [0, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+
+        assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
+            to_label(cg, 1, 1, 0, 0, 1)
+        )
+        assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
+            to_label(cg, 1, 2, 0, 0, 0)
+        )
+
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 1, 0, 0, 0),
+            sink_ids=to_label(cg, 1, 1, 0, 0, 1),
+            mincut=False,
+        ).new_root_ids
+
+        assert len(new_root_ids) == 2
+
+        svs2 = cg.get_subgraph([new_root_ids[0]], leaves_only=True)
+        svs1 = cg.get_subgraph([new_root_ids[1]], leaves_only=True)
+        len_set = {1, 2}
+        assert len(svs1) in len_set
+        len_set.remove(len(svs1))
+        assert len(svs2) in len_set
+
+        # verify new state
+        assert len(new_root_ids) == 2
+        assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) != cg.get_root(
+            to_label(cg, 1, 1, 0, 0, 1)
+        )
+        assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
+            to_label(cg, 1, 2, 0, 0, 0)
+        )
+
+        # l2id = cg.get_parent(to_label(cg, 1, 1, 0, 0, 0))
+        # cce = cg.get_atomic_cross_edges([l2id])[l2id]
+        # assert len(cce[3]) == 1
+        # assert cce[3][0][0] == to_label(cg, 1, 1, 0, 0, 0)
+        # assert cce[3][0][1] == to_label(cg, 1, 2, 0, 0, 0)
+
+        assert len(get_latest_roots(cg)) == 2
+        assert len(get_latest_roots(cg, fake_timestamp)) == 1
+
+    @pytest.mark.timeout(30)
+    def test_split_verify_loop(self, gen_graph):
+        """
+        Remove edge between existing RG supervoxels 1 and 2 (neighboring chunks)
+        ┌─────┬────────┬─────┐      ┌─────┬────────┬─────┐
+        |     │     A¹ │  B¹ │      |     │     A¹ │  B¹ │
+        |     │  4━━1━━┿━━5  │  =>  |     │  4  1━━┿━━5  │
+        |     │   /    │  |  │      |     │        │  |  │
+        |     │  3  2━━┿━━6  │      |     │  3  2━━┿━━6  │
+        └─────┴────────┴─────┘      └─────┴────────┴─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=4)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[
+                to_label(cg, 1, 1, 0, 0, 0),
+                to_label(cg, 1, 1, 0, 0, 1),
+                to_label(cg, 1, 1, 0, 0, 2),
+                to_label(cg, 1, 1, 0, 0, 3),
+            ],
+            edges=[
+                (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 2, 0, 0, 0), inf),
+                (to_label(cg, 1, 1, 0, 0, 1), to_label(cg, 1, 2, 0, 0, 1), inf),
+                (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 2), 0.5),
+                (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 3), 0.5),
+            ],
+            timestamp=fake_timestamp,
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 2, 0, 0, 0), to_label(cg, 1, 2, 0, 0, 1)],
+            edges=[
+                (to_label(cg, 1, 2, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), inf),
+                (to_label(cg, 1, 2, 0, 0, 1), to_label(cg, 1, 1, 0, 0, 1), inf),
+                (to_label(cg, 1, 2, 0, 0, 1), to_label(cg, 1, 2, 0, 0, 0), 0.5),
+            ],
+            timestamp=fake_timestamp,
+        )
+
+        add_parent_chunk(
+            cg,
+            3,
+            [0, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+        add_parent_chunk(
+            cg,
+            3,
+            [1, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+        add_parent_chunk(
+            cg,
+            4,
+            [0, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+
+        assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
+            to_label(cg, 1, 1, 0, 0, 1)
+        )
+        assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == cg.get_root(
+            to_label(cg, 1, 2, 0, 0, 0)
+        )
+
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 1, 0, 0, 0),
+            sink_ids=to_label(cg, 1, 1, 0, 0, 2),
+            mincut=False,
+        ).new_root_ids
+        assert len(new_root_ids) == 2
+
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 1, 0, 0, 0),
+            sink_ids=to_label(cg, 1, 1, 0, 0, 3),
+            mincut=False,
+        ).new_root_ids
+        assert len(new_root_ids) == 2
+
+        # l2id = cg.get_parent(to_label(cg, 1, 1, 0, 0, 0))
+        # cce = cg.get_atomic_cross_edges([l2id])
+        # assert len(cce[3]) == 1
+
+        assert len(get_latest_roots(cg)) == 3
+        assert len(get_latest_roots(cg, fake_timestamp)) == 1
+
+    # @pytest.mark.timeout(30)
+    # def test_split_pair_disconnected_chunks(self, gen_graph):
+    #     """
+    #     Remove edge between existing RG supervoxels 1 and 2 (disconnected chunks)
+    #     ┌─────┐     ┌─────┐      ┌─────┐     ┌─────┐
+    #     │  A¹ │ ... │  Z¹ │      │  A¹ │ ... │  Z¹ │
+    #     │  1━━┿━━━━━┿━━2  │  =>  │  1  │     │  2  │
+    #     │     │     │     │      │     │     │     │
+    #     └─────┘     └─────┘      └─────┘     └─────┘
+    #     """
+    #     cg: ChunkedGraph = gen_graph(n_layers=9)
+    #     fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+    #     create_chunk(
+    #         cg,
+    #         vertices=[to_label(cg, 1, 0, 0, 0, 0)],
+    #         edges=[
+    #             (
+    #                 to_label(cg, 1, 0, 0, 0, 0),
+    #                 to_label(cg, 1, 7, 7, 7, 0),
+    #                 1.0,
+    #             )
+    #         ],
+    #         timestamp=fake_timestamp,
+    #     )
+    #     create_chunk(
+    #         cg,
+    #         vertices=[to_label(cg, 1, 7, 7, 7, 0)],
+    #         edges=[
+    #             (
+    #                 to_label(cg, 1, 7, 7, 7, 0),
+    #                 to_label(cg, 1, 0, 0, 0, 0),
+    #                 1.0,
+    #             )
+    #         ],
+    #         timestamp=fake_timestamp,
+    #     )
+
+    #     add_parent_chunk(
+    #         cg,
+    #         3,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         3,
+    #         [1, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         4,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         4,
+    #         [1, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         5,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         5,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         6,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         6,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         7,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         7,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         8,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         8,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+    #     add_parent_chunk(
+    #         cg,
+    #         9,
+    #         [0, 0, 0],
+    #         time_stamp=fake_timestamp,
+    #         n_threads=1,
+    #     )
+
+    #     new_roots = cg.remove_edges(
+    #         "Jane Doe",
+    #         source_ids=to_label(cg, 1, 7, 7, 7, 0),
+    #         sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+    #         mincut=False,
+    #     ).new_root_ids
+
+    #     # verify new state
+    #     assert len(new_roots) == 2
+    #     assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) != cg.get_root(
+    #         to_label(cg, 1, 7, 7, 7, 0)
+    #     )
+    #     leaves = np.unique(
+    #         cg.get_subgraph(
+    #             [cg.get_root(to_label(cg, 1, 0, 0, 0, 0))], leaves_only=True
+    #         )
+    #     )
+    #     assert len(leaves) == 1 and to_label(cg, 1, 0, 0, 0, 0) in leaves
+    #     leaves = np.unique(
+    #         cg.get_subgraph(
+    #             [cg.get_root(to_label(cg, 1, 7, 7, 7, 0))], leaves_only=True
+    #         )
+    #     )
+    #     assert len(leaves) == 1 and to_label(cg, 1, 7, 7, 7, 0) in leaves
+
+    #     # verify old state
+    #     assert cg.get_root(
+    #         to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
+    #     ) == cg.get_root(to_label(cg, 1, 7, 7, 7, 0), time_stamp=fake_timestamp)
+    #     leaves = np.unique(
+    #         cg.get_subgraph(
+    #             [cg.get_root(to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp)],
+    #             leaves_only=True,
+    #         )
+    #     )
+    #     assert len(leaves) == 2
+    #     assert to_label(cg, 1, 0, 0, 0, 0) in leaves
+    #     assert to_label(cg, 1, 7, 7, 7, 0) in leaves
+
+    @pytest.mark.timeout(30)
+    def test_split_pair_already_disconnected(self, gen_graph):
+        """
+        Try to remove edge between already disconnected RG supervoxels 1 and 2 (same chunk).
+        Expected: No change, no error
+        ┌─────┐      ┌─────┐
+        │  A¹ │      │  A¹ │
+        │ 1 2 │  =>  │ 1 2 │
+        │     │      │     │
+        └─────┘      └─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=2)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
+            edges=[],
+            timestamp=fake_timestamp,
+        )
+        res_old = cg.client._table.read_rows()
+        res_old.consume_all()
+
+        with pytest.raises(exceptions.PreconditionError):
+            cg.remove_edges(
+                "Jane Doe",
+                source_ids=to_label(cg, 1, 0, 0, 0, 1),
+                sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+                mincut=False,
+            )
+
+        res_new = cg.client._table.read_rows()
+        res_new.consume_all()
+
+        if res_old.rows != res_new.rows:
+            warn(
+                "Rows were modified when splitting a pair of already disconnected supervoxels."
+                "While probably not an error, it is an unnecessary operation."
+            )
+
+    @pytest.mark.timeout(30)
+    def test_split_full_circle_to_triple_chain_same_chunk(self, gen_graph):
+        """
+        Remove direct edge between RG supervoxels 1 and 2, but leave indirect connection (same chunk)
+        ┌─────┐      ┌─────┐
+        │  A¹ │      │  A¹ │
+        │ 1━2 │  =>  │ 1 2 │
+        │ ┗3┛ │      │ ┗3┛ │
+        └─────┘      └─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=2)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[
+                to_label(cg, 1, 0, 0, 0, 0),
+                to_label(cg, 1, 0, 0, 0, 1),
+                to_label(cg, 1, 0, 0, 0, 2),
+            ],
+            edges=[
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 2), 0.5),
+                (to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 0, 0, 0, 2), 0.5),
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.3),
+            ],
+            timestamp=fake_timestamp,
+        )
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 0, 0, 0, 1),
+            sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+            mincut=False,
+        ).new_root_ids
+
+        # verify new state
+        assert len(new_root_ids) == 1
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == new_root_ids[0]
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 1)) == new_root_ids[0]
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 2)) == new_root_ids[0]
+        leaves = np.unique(cg.get_subgraph([new_root_ids[0]], leaves_only=True))
+        assert len(leaves) == 3
+        assert to_label(cg, 1, 0, 0, 0, 0) in leaves
+        assert to_label(cg, 1, 0, 0, 0, 1) in leaves
+        assert to_label(cg, 1, 0, 0, 0, 2) in leaves
+
+        # verify old state
+        old_root_id = cg.get_root(
+            to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
+        )
+        assert new_root_ids[0] != old_root_id
+        assert len(get_latest_roots(cg)) == 1
+        assert len(get_latest_roots(cg, fake_timestamp)) == 1
+
+    @pytest.mark.timeout(30)
+    def test_split_full_circle_to_triple_chain_neighboring_chunks(self, gen_graph):
+        """
+        Remove direct edge between RG supervoxels 1 and 2, but leave indirect connection (neighboring chunks)
+        ┌─────┬─────┐      ┌─────┬─────┐
+        │  A¹ │  B¹ │      │  A¹ │  B¹ │
+        │  1━━┿━━2  │  =>  │  1  │  2  │
+        │  ┗3━┿━━┛  │      │  ┗3━┿━━┛  │
+        └─────┴─────┘      └─────┴─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=3)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
+            edges=[
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
+                (to_label(cg, 1, 0, 0, 0, 1), to_label(cg, 1, 1, 0, 0, 0), 0.5),
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), 0.3),
+            ],
+            timestamp=fake_timestamp,
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 1, 0, 0, 0)],
+            edges=[
+                (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
+                (to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), 0.3),
+            ],
+            timestamp=fake_timestamp,
+        )
+        add_parent_chunk(
+            cg,
+            3,
+            [0, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+
+        new_root_ids = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 1, 0, 0, 0),
+            sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+            mincut=False,
+        ).new_root_ids
+
+        # verify new state
+        assert len(new_root_ids) == 1
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == new_root_ids[0]
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 1)) == new_root_ids[0]
+        assert cg.get_root(to_label(cg, 1, 1, 0, 0, 0)) == new_root_ids[0]
+        leaves = np.unique(cg.get_subgraph([new_root_ids[0]], leaves_only=True))
+        assert len(leaves) == 3
+        assert to_label(cg, 1, 0, 0, 0, 0) in leaves
+        assert to_label(cg, 1, 0, 0, 0, 1) in leaves
+        assert to_label(cg, 1, 1, 0, 0, 0) in leaves
+
+        # verify old state
+        old_root_id = cg.get_root(
+            to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
+        )
+        assert new_root_ids[0] != old_root_id
+        assert len(get_latest_roots(cg)) == 1
+        assert len(get_latest_roots(cg, fake_timestamp)) == 1
+
+    # @pytest.mark.timeout(30)
+    # def test_split_full_circle_to_triple_chain_disconnected_chunks(self, gen_graph):
+    #     """
+    #     Remove direct edge between RG supervoxels 1 and 2, but leave indirect connection (disconnected chunks)
+    #     ┌─────┐     ┌─────┐      ┌─────┐     ┌─────┐
+    #     │  A¹ │ ... │  Z¹ │      │  A¹ │ ... │  Z¹ │
+    #     │  1━━┿━━━━━┿━━2  │  =>  │  1  │     │  2  │
+    #     │  ┗3━┿━━━━━┿━━┛  │      │  ┗3━┿━━━━━┿━━┛  │
+    #     └─────┘     └─────┘      └─────┘     └─────┘
+    #     """
+    #     cg: ChunkedGraph = gen_graph(n_layers=9)
+    #     loc = 2
+    #     fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+    #     create_chunk(
+    #         cg,
+    #         vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
+    #         edges=[
+    #             (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
+    #             (
+    #                 to_label(cg, 1, 0, 0, 0, 1),
+    #                 to_label(cg, 1, loc, loc, loc, 0),
+    #                 0.5,
+    #             ),
+    #             (
+    #                 to_label(cg, 1, 0, 0, 0, 0),
+    #                 to_label(cg, 1, loc, loc, loc, 0),
+    #                 0.3,
+    #             ),
+    #         ],
+    #         timestamp=fake_timestamp,
+    #     )
+    #     create_chunk(
+    #         cg,
+    #         vertices=[to_label(cg, 1, loc, loc, loc, 0)],
+    #         edges=[
+    #             (
+    #                 to_label(cg, 1, loc, loc, loc, 0),
+    #                 to_label(cg, 1, 0, 0, 0, 1),
+    #                 0.5,
+    #             ),
+    #             (
+    #                 to_label(cg, 1, loc, loc, loc, 0),
+    #                 to_label(cg, 1, 0, 0, 0, 0),
+    #                 0.3,
+    #             ),
+    #         ],
+    #         timestamp=fake_timestamp,
+    #     )
+    #     for i_layer in range(3, 10):
+    #         if loc // 2 ** (i_layer - 3) == 1:
+    #             add_parent_chunk(
+    #                 cg,
+    #                 i_layer,
+    #                 [0, 0, 0],
+    #                 time_stamp=fake_timestamp,
+    #                 n_threads=1,
+    #             )
+    #         elif loc // 2 ** (i_layer - 3) == 0:
+    #             add_parent_chunk(
+    #                 cg,
+    #                 i_layer,
+    #                 [0, 0, 0],
+    #                 time_stamp=fake_timestamp,
+    #                 n_threads=1,
+    #             )
+    #         else:
+    #             add_parent_chunk(
+    #                 cg,
+    #                 i_layer,
+    #                 [0, 0, 0],
+    #                 time_stamp=fake_timestamp,
+    #                 n_threads=1,
+    #             )
+    #             add_parent_chunk(
+    #                 cg,
+    #                 i_layer,
+    #                 [0, 0, 0],
+    #                 time_stamp=fake_timestamp,
+    #                 n_threads=1,
+    #             )
+
+    #     assert (
+    #         cg.get_root(to_label(cg, 1, loc, loc, loc, 0))
+    #         == cg.get_root(to_label(cg, 1, 0, 0, 0, 0))
+    #         == cg.get_root(to_label(cg, 1, 0, 0, 0, 1))
+    #     )
+    #     new_root_ids = cg.remove_edges(
+    #         "Jane Doe",
+    #         source_ids=to_label(cg, 1, loc, loc, loc, 0),
+    #         sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+    #         mincut=False,
+    #     ).new_root_ids
+
+    #     # verify new state
+    #     assert len(new_root_ids) == 1
+    #     assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == new_root_ids[0]
+    #     assert cg.get_root(to_label(cg, 1, 0, 0, 0, 1)) == new_root_ids[0]
+    #     assert cg.get_root(to_label(cg, 1, loc, loc, loc, 0)) == new_root_ids[0]
+    #     leaves = np.unique(cg.get_subgraph([new_root_ids[0]], leaves_only=True))
+    #     assert len(leaves) == 3
+    #     assert to_label(cg, 1, 0, 0, 0, 0) in leaves
+    #     assert to_label(cg, 1, 0, 0, 0, 1) in leaves
+    #     assert to_label(cg, 1, loc, loc, loc, 0) in leaves
+
+    #     # verify old state
+    #     old_root_id = cg.get_root(
+    #         to_label(cg, 1, 0, 0, 0, 0), time_stamp=fake_timestamp
+    #     )
+    #     assert new_root_ids[0] != old_root_id
+
+    #     assert len(get_latest_roots(cg)) == 1
+    #     assert len(get_latest_roots(cg, fake_timestamp)) == 1
+
+    @pytest.mark.timeout(30)
+    def test_split_same_node(self, gen_graph):
+        """
+        Try to remove (non-existing) edge between RG supervoxel 1 and itself
+        ┌─────┐
+        │  A¹ │
+        │  1  │  =>  Reject
+        │     │
+        └─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=2)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0)],
+            edges=[],
+            timestamp=fake_timestamp,
+        )
+
+        res_old = cg.client._table.read_rows()
+        res_old.consume_all()
+        with pytest.raises(exceptions.PreconditionError):
+            cg.remove_edges(
+                "Jane Doe",
+                source_ids=to_label(cg, 1, 0, 0, 0, 0),
+                sink_ids=to_label(cg, 1, 0, 0, 0, 0),
+                mincut=False,
+            )
+
+        res_new = cg.client._table.read_rows()
+        res_new.consume_all()
+        assert res_new.rows == res_old.rows
+
+    @pytest.mark.timeout(30)
+    def test_split_pair_abstract_nodes(self, gen_graph):
+        """
+        Try to remove (non-existing) edge between RG supervoxel 1 and abstract node "2"
+                    ┌─────┐
+                    │  B² │
+                    │ "2" │
+                    │     │
+                    └─────┘
+        ┌─────┐              =>  Reject
+        │  A¹ │
+        │  1  │
+        │     │
+        └─────┘
+        """
+
+        cg: ChunkedGraph = gen_graph(n_layers=3)
+        fake_timestamp = datetime.now(UTC) - timedelta(days=10)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0)],
+            edges=[],
+            timestamp=fake_timestamp,
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 1, 0, 0, 0)],
+            edges=[],
+            timestamp=fake_timestamp,
+        )
+
+        add_parent_chunk(
+            cg,
+            3,
+            [0, 0, 0],
+            time_stamp=fake_timestamp,
+            n_threads=1,
+        )
+        res_old = cg.client._table.read_rows()
+        res_old.consume_all()
+        with pytest.raises((exceptions.PreconditionError, AssertionError)):
+            cg.remove_edges(
+                "Jane Doe",
+                source_ids=to_label(cg, 1, 0, 0, 0, 0),
+                sink_ids=to_label(cg, 2, 1, 0, 0, 1),
+                mincut=False,
+            )
+
+        res_new = cg.client._table.read_rows()
+        res_new.consume_all()
+        assert res_new.rows == res_old.rows
+
+    @pytest.mark.timeout(30)
+    def test_diagonal_connections(self, gen_graph):
+        """
+        Create graph with edge between RG supervoxels 1 and 2 (same chunk)
+        and edge between RG supervoxels 1 and 3 (neighboring chunks)
+        ┌─────┬─────┐
+        │  A¹ │  B¹ │
+        │ 2━1━┿━━3  │
+        │  /  │     │
+        ┌─────┬─────┐
+        │  |  │     │
+        │  4━━┿━━5  │
+        │  C¹ │  D¹ │
+        └─────┴─────┘
+        """
+        cg: ChunkedGraph = gen_graph(n_layers=3)
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1)],
+            edges=[
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 1), 0.5),
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 1, 0, 0, 0), inf),
+                (to_label(cg, 1, 0, 0, 0, 0), to_label(cg, 1, 0, 1, 0, 0), inf),
+            ],
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 1, 0, 0, 0)],
+            edges=[(to_label(cg, 1, 1, 0, 0, 0), to_label(cg, 1, 0, 0, 0, 0), inf)],
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 0, 1, 0, 0)],
+            edges=[
+                (to_label(cg, 1, 0, 1, 0, 0), to_label(cg, 1, 1, 1, 0, 0), inf),
+                (to_label(cg, 1, 0, 1, 0, 0), to_label(cg, 1, 0, 0, 0, 0), inf),
+            ],
+        )
+        create_chunk(
+            cg,
+            vertices=[to_label(cg, 1, 1, 1, 0, 0)],
+            edges=[(to_label(cg, 1, 1, 1, 0, 0), to_label(cg, 1, 0, 1, 0, 0), inf)],
+        )
+        add_parent_chunk(
+            cg,
+            3,
+            [0, 0, 0],
+            n_threads=1,
+        )
+
+        rr = cg.range_read_chunk(chunk_id=cg.get_chunk_id(layer=3, x=0, y=0, z=0))
+        root_ids_t0 = list(rr.keys())
+        assert len(root_ids_t0) == 1
+
+        child_ids = []
+        for root_id in root_ids_t0:
+            child_ids.extend([cg.get_subgraph([root_id], leaves_only=True)])
+
+        new_roots = cg.remove_edges(
+            "Jane Doe",
+            source_ids=to_label(cg, 1, 0, 0, 0, 0),
+            sink_ids=to_label(cg, 1, 0, 0, 0, 1),
+            mincut=False,
+        ).new_root_ids
+
+        assert len(new_roots) == 2
+        assert cg.get_root(to_label(cg, 1, 1, 1, 0, 0)) == cg.get_root(
+            to_label(cg, 1, 0, 1, 0, 0)
+        )
+        assert cg.get_root(to_label(cg, 1, 0, 0, 0, 0)) == cg.get_root(
+            to_label(cg, 1, 0, 0, 0, 0)
+        )

From b171f2ef44964ca3fa54602aeb69c708ae8ddd04 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Mon, 10 Jun 2024 15:13:15 +0000
Subject: [PATCH 086/105] segregate update nodes logic

---
 .../graph/client/bigtable/client.py           |  8 +++++-
 pychunkedgraph/ingest/upgrade/atomic_layer.py | 26 +++++++++++--------
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/pychunkedgraph/graph/client/bigtable/client.py b/pychunkedgraph/graph/client/bigtable/client.py
index 52ec9a856..9195fb397 100644
--- a/pychunkedgraph/graph/client/bigtable/client.py
+++ b/pychunkedgraph/graph/client/bigtable/client.py
@@ -151,6 +151,7 @@ def read_nodes(
         end_time=None,
         end_time_inclusive: bool = False,
         fake_edges: bool = False,
+        attr_keys: bool = True,
     ):
         """
         Read nodes and their properties.
@@ -186,8 +187,13 @@ def read_nodes(
             end_time_inclusive=end_time_inclusive,
             user_id=user_id,
         )
+        if attr_keys:
+            return {
+                deserialize_uint64(row_key, fake_edges=fake_edges): data
+                for (row_key, data) in rows.items()
+            }
         return {
-            deserialize_uint64(row_key, fake_edges=fake_edges): data
+            deserialize_uint64(row_key, fake_edges=fake_edges): {k.key:v for k,v in data.items()}
             for (row_key, data) in rows.items()
         }
 
diff --git a/pychunkedgraph/ingest/upgrade/atomic_layer.py b/pychunkedgraph/ingest/upgrade/atomic_layer.py
index 96f7f71bd..6c4244968 100644
--- a/pychunkedgraph/ingest/upgrade/atomic_layer.py
+++ b/pychunkedgraph/ingest/upgrade/atomic_layer.py
@@ -79,17 +79,7 @@ def update_cross_edges(cg: ChunkedGraph, node, cx_edges_d, node_ts, end_ts) -> l
     return rows
 
 
-def update_chunk(cg: ChunkedGraph, chunk_coords: list[int], layer: int = 2):
-    """
-    Iterate over all L2 IDs in a chunk and update their cross chunk edges,
-    within the periods they were valid/active.
-    """
-    x, y, z = chunk_coords
-    chunk_id = cg.get_chunk_id(layer=layer, x=x, y=y, z=z)
-    cg.copy_fake_edges(chunk_id)
-    rr = cg.range_read_chunk(chunk_id)
-    nodes = list(rr.keys())
-
+def update_nodes(cg: ChunkedGraph, nodes) -> list:
     # get start_ts when node becomes valid
     nodes_ts = cg.get_node_timestamps(nodes, return_numpy=False, normalize=True)
     cx_edges_d = cg.get_atomic_cross_edges(nodes)
@@ -116,4 +106,18 @@ def update_chunk(cg: ChunkedGraph, chunk_coords: list[int], layer: int = 2):
         # for each timestamp until end_ts, update cross chunk edges of node
         _rows = update_cross_edges(cg, node, node_cx_edges_d, start_ts, end_ts)
         rows.extend(_rows)
+    return rows
+
+
+def update_chunk(cg: ChunkedGraph, chunk_coords: list[int], layer: int = 2):
+    """
+    Iterate over all L2 IDs in a chunk and update their cross chunk edges,
+    within the periods they were valid/active.
+    """
+    x, y, z = chunk_coords
+    chunk_id = cg.get_chunk_id(layer=layer, x=x, y=y, z=z)
+    cg.copy_fake_edges(chunk_id)
+    rr = cg.range_read_chunk(chunk_id)
+    nodes = list(rr.keys())
+    rows = update_nodes(cg, nodes)
     cg.client.write(rows)

From a72d0ff7200ff7eb5b1f299ff2da50912137a1b6 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Fri, 28 Jun 2024 16:43:45 +0000
Subject: [PATCH 087/105] fix(edits): overwrite children partners when
 superseded by parents

---
 pychunkedgraph/debug/utils.py     | 15 +++++++--
 pychunkedgraph/graph/edits.py     | 56 ++++---------------------------
 pychunkedgraph/graph/operation.py |  3 ++
 3 files changed, 23 insertions(+), 51 deletions(-)

diff --git a/pychunkedgraph/debug/utils.py b/pychunkedgraph/debug/utils.py
index b1bdbc2be..130d85500 100644
--- a/pychunkedgraph/debug/utils.py
+++ b/pychunkedgraph/debug/utils.py
@@ -2,6 +2,8 @@
 
 import numpy as np
 
+from pychunkedgraph.graph.meta import ChunkedGraphMeta, GraphConfig
+
 
 def print_attrs(d):
     for k, v in d.items():
@@ -41,14 +43,14 @@ def sanity_check(cg, new_roots, operation_id):
     """
     Check for duplicates in hierarchy, useful for debugging.
     """
-    print(f"{len(new_roots)} new ids from {operation_id}")
+    # print(f"{len(new_roots)} new ids from {operation_id}")
     l2c_d = {}
     for new_root in new_roots:
         l2c_d[new_root] = get_l2children(cg, new_root)
     success = True
     for k, v in l2c_d.items():
         success = success and (len(v) == np.unique(v).size)
-        print(f"{k}: {np.unique(v).size}, {len(v)}")
+        # print(f"{k}: {np.unique(v).size}, {len(v)}")
     if not success:
         raise RuntimeError("Some ids are not valid.")
 
@@ -58,3 +60,12 @@ def sanity_check_single(cg, node, operation_id):
     msg = f"invalid node {node}:"
     msg += f" found {len(v)} l2 ids, must be {np.unique(v).size}"
     assert np.unique(v).size == len(v), f"{msg}, from {operation_id}."
+    return v
+
+
+def update_graph_id(cg, new_graph_id:str):
+    old_gc = cg.meta.graph_config._asdict()
+    old_gc["ID"] = new_graph_id
+    new_gc = GraphConfig(**old_gc)
+    new_meta = ChunkedGraphMeta(new_gc, cg.meta.data_source, cg.meta.custom_data)
+    cg.update_meta(new_meta, overwrite=True)
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 807fff257..0778a1f82 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -420,9 +420,15 @@ def _update_neighbor_cross_edges_single(
                 continue
             assert np.all(edges[:, 0] == counterpart)
             edges = fastremap.remap(edges, node_map, preserve_missing_labels=True)
-            if layer == counterpart_layer:
+            if layer == counterpart_layer and layer >= node_layer:
                 reverse_edge = np.array([counterpart, new_id], dtype=basetypes.NODE_ID)
                 edges = np.concatenate([edges, [reverse_edge]])
+                children = cg.get_children(new_id)
+                mask = np.isin(edges[:, 1], children)
+                if np.any(mask):
+                    masked_edges = edges[mask]
+                    masked_edges[:, 1] = new_id
+                    edges[mask] = masked_edges
                 edges = np.unique(edges, axis=0)
             edges_d[layer] = edges
             val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
@@ -578,49 +584,6 @@ def _update_cross_edge_cache(self, parent, children):
             assert np.all(edges[:, 0] == parent), f"{parent}, {np.unique(edges[:, 0])}"
         self.cg.cache.cross_chunk_edges_cache[parent] = new_cx_edges_d
 
-    def _update_neighbor_parents(self, neighbor, ceil_layer: int, updated: set) -> list:
-        """helper for `_update_skipped_neighbors`"""
-        parents = []
-        while True:
-            parent = self.cg.get_parent(neighbor, time_stamp=self._last_successful_ts)
-            parent_layer = self.cg.get_chunk_layer(parent)
-            if parent_layer >= ceil_layer or parent in updated:
-                break
-            children = self.cg.get_children(parent)
-            self._update_cross_edge_cache(parent, children)
-            parents.append(parent)
-            neighbor = parent
-        return parents
-
-    def _update_skipped_neighbors(self, node, layer, parent_layer):
-        """
-        Updates cross edges of neighbors of a skip connection node.
-        Neighbors of such nodes can have parents at contiguous layers.
-
-        This method updates cross edges of all such parents
-        from `layer` through `parent_layer`.
-        """
-        updated_parents = set()
-        cx_edges_d = self.cg.cache.cross_chunk_edges_cache[node]
-        for _layer in range(layer, parent_layer + 1):
-            layer_edges = cx_edges_d.get(_layer, types.empty_2d)
-            neighbors = layer_edges[:, 1]
-            for n in neighbors:
-                if n in self._new_old_id_d:
-                    # ignore new ids
-                    continue
-                res = self._update_neighbor_parents(n, parent_layer, updated_parents)
-                updated_parents.update(res)
-        updated_entries = []
-        for parent in updated_parents:
-            val_dict = {}
-            for _layer, edges in self.cg.cache.cross_chunk_edges_cache[parent].items():
-                val_dict[attributes.Connectivity.CrossChunkEdge[_layer]] = edges
-            rkey = serialize_uint64(parent)
-            row = self.cg.client.mutate_row(rkey, val_dict, time_stamp=self._time_stamp)
-            updated_entries.append(row)
-        return updated_entries
-
     def _create_new_parents(self, layer: int):
         """
         keep track of old IDs
@@ -635,7 +598,6 @@ def _create_new_parents(self, layer: int):
         layer_node_ids = self._get_layer_node_ids(new_ids, layer)
         components, graph_ids = self._get_connected_components(layer_node_ids, layer)
         for cc_indices in components:
-            update_skipped_neighbors = False
             parent_layer = layer + 1  # must be reset for each connected component
             cc_ids = graph_ids[cc_indices]
             if len(cc_ids) == 1:
@@ -648,7 +610,6 @@ def _create_new_parents(self, layer: int):
                     if len(cx_edges_d[cc_ids[0]].get(l, types.empty_2d)) > 0:
                         parent_layer = l
                         break
-                update_skipped_neighbors = cc_ids[0] in self._new_old_id_d
             parent = self.cg.id_client.create_node_id(
                 self.cg.get_parent_chunk_id(cc_ids[0], parent_layer),
                 root_chunk=parent_layer == self.cg.meta.layer_count,
@@ -658,9 +619,6 @@ def _create_new_parents(self, layer: int):
             self.cg.cache.children_cache[parent] = cc_ids
             cache_utils.update(self.cg.cache.parents_cache, cc_ids, parent)
             sanity_check_single(self.cg, parent, self._operation_id)
-            if update_skipped_neighbors:
-                res = self._update_skipped_neighbors(cc_ids[0], layer, parent_layer)
-                self.new_entries.extend(res)
 
     def run(self) -> Iterable:
         """
diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py
index 39668565f..6d2f3c0bb 100644
--- a/pychunkedgraph/graph/operation.py
+++ b/pychunkedgraph/graph/operation.py
@@ -457,6 +457,9 @@ def execute(
             except PostconditionError as err:
                 self.cg.cache = None
                 raise PostconditionError(err) from err
+            except (AssertionError, RuntimeError) as err:
+                self.cg.cache = None
+                raise RuntimeError(err) from err
             except Exception as err:
                 # unknown exception, update log record with error
                 self.cg.cache = None

From ea65ca69f5331e65a84c185751c247d9183eda1a Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Thu, 4 Jul 2024 15:54:48 +0000
Subject: [PATCH 088/105] fix: unique edges always, predecing edit ts, allow
 same segment merge

---
 pychunkedgraph/graph/edits.py     |  6 +++---
 pychunkedgraph/graph/operation.py | 18 +++++++++++-------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 0778a1f82..735ae65f8 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -414,13 +414,13 @@ def _update_neighbor_cross_edges_single(
     for counterpart, edges_d in cp_cx_edges_d.items():
         val_dict = {}
         counterpart_layer = counterpart_layers[counterpart]
-        for layer in range(2, cg.meta.layer_count):
+        for layer in range(node_layer, cg.meta.layer_count):
             edges = edges_d.get(layer, types.empty_2d)
             if edges.size == 0:
                 continue
             assert np.all(edges[:, 0] == counterpart)
             edges = fastremap.remap(edges, node_map, preserve_missing_labels=True)
-            if layer == counterpart_layer and layer >= node_layer:
+            if layer == counterpart_layer:
                 reverse_edge = np.array([counterpart, new_id], dtype=basetypes.NODE_ID)
                 edges = np.concatenate([edges, [reverse_edge]])
                 children = cg.get_children(new_id)
@@ -429,7 +429,7 @@ def _update_neighbor_cross_edges_single(
                     masked_edges = edges[mask]
                     masked_edges[:, 1] = new_id
                     edges[mask] = masked_edges
-                edges = np.unique(edges, axis=0)
+            edges = np.unique(edges, axis=0)
             edges_d[layer] = edges
             val_dict[attributes.Connectivity.CrossChunkEdge[layer]] = edges
         if not val_dict:
diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py
index 6d2f3c0bb..1a221d236 100644
--- a/pychunkedgraph/graph/operation.py
+++ b/pychunkedgraph/graph/operation.py
@@ -615,13 +615,16 @@ def _apply(
                 edges_only=True,
             )
 
-        with TimeIt("preprocess", self.cg.graph_id, operation_id):
-            inactive_edges = edits.merge_preprocess(
-                self.cg,
-                subgraph_edges=edges,
-                supervoxels=self.added_edges.ravel(),
-                parent_ts=self.parent_ts,
-            )
+        if self.allow_same_segment_merge:
+            inactive_edges = types.empty_2d
+        else:
+            with TimeIt("preprocess", self.cg.graph_id, operation_id):
+                inactive_edges = edits.merge_preprocess(
+                    self.cg,
+                    subgraph_edges=edges,
+                    supervoxels=self.added_edges.ravel(),
+                    parent_ts=self.parent_ts,
+                )
 
         atomic_edges, fake_edge_rows = edits.check_fake_edges(
             self.cg,
@@ -637,6 +640,7 @@ def _apply(
                 operation_id=operation_id,
                 time_stamp=timestamp,
                 parent_ts=self.parent_ts,
+                allow_same_segment_merge=self.allow_same_segment_merge
             )
         return new_roots, new_l2_ids, fake_edge_rows + new_entries
 

From 7432d8ae78f4fa1768b9d9ea9a0037a52ecff3f9 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Thu, 4 Jul 2024 16:22:25 +0000
Subject: [PATCH 089/105] =?UTF-8?q?Bump=20version:=203.0.0=20=E2=86=92=203?=
 =?UTF-8?q?.0.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg           | 2 +-
 pychunkedgraph/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 5583246c5..6526fbc66 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.0.0
+current_version = 3.0.1
 commit = True
 tag = True
 
diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 528787cfc..055276878 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.0"
+__version__ = "3.0.1"

From bd4dd27d29dbbddae396b8d168a66f6585624cd6 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sat, 6 Jul 2024 17:38:44 +0000
Subject: [PATCH 090/105] fix(edits): mask all descendants when updating cx
 edges

---
 pychunkedgraph/graph/edits.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 735ae65f8..add0c9d0c 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -391,6 +391,25 @@ def _get_flipped_ids(id_map, node_ids):
     return np.concatenate(ids)
 
 
+def _get_descendants(cg, new_id):
+    """get all descendants at layers >= 2"""
+    result = []
+    children = cg.get_children(new_id)
+    while True:
+        mask = cg.get_chunk_layers(children) >= 2
+        children = children[mask]
+        result.extend(children)
+
+        mask = cg.get_chunk_layers(children) > 2
+        children = children[mask]
+        if children.size == 0:
+            break
+
+        children = cg.get_children(children, flatten=True)
+    return result
+
+
+
 def _update_neighbor_cross_edges_single(
     cg, new_id: int, cx_edges_d: dict, node_map: dict, *, parent_ts
 ) -> dict:
@@ -423,8 +442,8 @@ def _update_neighbor_cross_edges_single(
             if layer == counterpart_layer:
                 reverse_edge = np.array([counterpart, new_id], dtype=basetypes.NODE_ID)
                 edges = np.concatenate([edges, [reverse_edge]])
-                children = cg.get_children(new_id)
-                mask = np.isin(edges[:, 1], children)
+                descendants = _get_descendants(cg, new_id)
+                mask = np.isin(edges[:, 1], descendants)
                 if np.any(mask):
                     masked_edges = edges[mask]
                     masked_edges[:, 1] = new_id

From 02c727d430270fff43a3ad8dc4f293333b3ad5d0 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sat, 6 Jul 2024 17:39:16 +0000
Subject: [PATCH 091/105] =?UTF-8?q?Bump=20version:=203.0.1=20=E2=86=92=203?=
 =?UTF-8?q?.0.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg           | 2 +-
 pychunkedgraph/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 6526fbc66..62209053d 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.0.1
+current_version = 3.0.2
 commit = True
 tag = True
 
diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 055276878..131942e76 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.1"
+__version__ = "3.0.2"

From 9b0694e25c9d2cdf56df86d4dbab802b34f265fc Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 7 Jul 2024 01:25:34 +0000
Subject: [PATCH 092/105] fix(edits): use supervoxels to get the correct cross
 edge parents

---
 pychunkedgraph/graph/edits.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index add0c9d0c..4efead0c9 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -496,6 +496,25 @@ def _update_neighbor_cross_edges(
     return updated_entries
 
 
+def _get_supervoxels(cg, node_ids):
+    """Returns the first supervoxel found for each node_id."""
+    result  = {}
+    node_ids_copy = np.copy(node_ids)
+    children = np.copy(node_ids)
+    children_d = cg.get_children(node_ids)
+    while True:
+        children = [children_d[k][0] for k in children]
+        children = np.array(children, dtype=basetypes.NODE_ID)
+        mask = cg.get_chunk_layers(children) == 1
+        result.update([(node, sv) for node, sv in zip(node_ids[mask], children[mask])])
+        node_ids = node_ids[~mask]
+        children = children[~mask]
+        if children.size == 0:
+            break
+        children_d = cg.get_children(children)
+    return np.array([result[k] for k in node_ids_copy], dtype=basetypes.NODE_ID)
+
+
 class CreateParentNodes:
     def __init__(
         self,
@@ -586,8 +605,9 @@ def _update_cross_edge_cache(self, parent, children):
         )
         cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values())
         edge_nodes = np.unique(np.concatenate([*cx_edges_d.values(), types.empty_2d]))
+        edge_supervoxels = _get_supervoxels(self.cg, edge_nodes)
         edge_parents = self.cg.get_roots(
-            edge_nodes,
+            edge_supervoxels,
             stop_layer=parent_layer,
             ceil=False,
             time_stamp=self._last_successful_ts,

From 257ad9e99dc204e66fb043c07ecefc5b3d3a2ec4 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 7 Jul 2024 01:25:52 +0000
Subject: [PATCH 093/105] =?UTF-8?q?Bump=20version:=203.0.2=20=E2=86=92=203?=
 =?UTF-8?q?.0.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg           | 2 +-
 pychunkedgraph/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 62209053d..f98e5ee64 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.0.2
+current_version = 3.0.3
 commit = True
 tag = True
 
diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 131942e76..8d1c8625f 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.2"
+__version__ = "3.0.3"

From d1dbdaed65cf4b542879b8beaed2f4b22f42e867 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Tue, 16 Jul 2024 19:24:15 +0000
Subject: [PATCH 094/105] fix(edits/split): filter out inactive cross edges

---
 pychunkedgraph/graph/chunkedgraph.py | 15 ++++++++++++++-
 pychunkedgraph/graph/edits.py        |  7 ++++---
 pychunkedgraph/graph/operation.py    | 10 +---------
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 7d1a24cc3..1836094f0 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -657,7 +657,11 @@ def copy_fake_edges(self, chunk_id: np.uint64) -> None:
         self.client.write(mutations)
 
     def get_l2_agglomerations(
-        self, level2_ids: np.ndarray, edges_only: bool = False
+        self,
+        level2_ids: np.ndarray,
+        edges_only: bool = False,
+        active: bool = False,
+        time_stamp: typing.Optional[datetime.datetime] = None,
     ) -> typing.Tuple[typing.Dict[int, types.Agglomeration], typing.Tuple[Edges]]:
         """
         Children of Level 2 Node IDs and edges.
@@ -703,6 +707,15 @@ def get_l2_agglomerations(
                     raise ValueError("Found conflicting parents.")
             sv_parent_d.update(dict(zip(svs.tolist(), [l2id] * len(svs))))
 
+        if active:
+            n1, n2 = all_chunk_edges.node_ids1, all_chunk_edges.node_ids2
+            layers = self.get_cross_chunk_edges_layer(all_chunk_edges.get_pairs())
+            max_layer = np.max(layers) + 1
+            parents1 = self.get_roots(n1, stop_layer=max_layer, time_stamp=time_stamp)
+            parents2 = self.get_roots(n2, stop_layer=max_layer, time_stamp=time_stamp)
+            mask = parents1 == parents2
+            all_chunk_edges = all_chunk_edges[mask]
+
         in_edges, out_edges, cross_edges = edge_utils.categorize_edges_v2(
             self.meta, all_chunk_edges, sv_parent_d
         )
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 4efead0c9..30e86951a 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -313,7 +313,6 @@ def remove_edges(
     cg,
     *,
     atomic_edges: Iterable[np.ndarray],
-    l2id_agglomeration_d: Dict,
     operation_id: basetypes.OPERATION_ID = None,
     time_stamp: datetime.datetime = None,
     parent_ts: datetime.datetime = None,
@@ -323,6 +322,9 @@ def remove_edges(
     roots = cg.get_roots(l2ids, assert_roots=True, time_stamp=parent_ts)
     assert np.unique(roots).size == 1, "L2 IDs must belong to same root."
 
+    l2id_agglomeration_d, _ = cg.get_l2_agglomerations(
+        l2ids, active=True, time_stamp=parent_ts
+    )
     new_old_id_d = defaultdict(set)
     old_new_id_d = defaultdict(set)
     old_hierarchy_d = _init_old_hierarchy(cg, l2ids, parent_ts=parent_ts)
@@ -409,7 +411,6 @@ def _get_descendants(cg, new_id):
     return result
 
 
-
 def _update_neighbor_cross_edges_single(
     cg, new_id: int, cx_edges_d: dict, node_map: dict, *, parent_ts
 ) -> dict:
@@ -498,7 +499,7 @@ def _update_neighbor_cross_edges(
 
 def _get_supervoxels(cg, node_ids):
     """Returns the first supervoxel found for each node_id."""
-    result  = {}
+    result = {}
     node_ids_copy = np.copy(node_ids)
     children = np.copy(node_ids)
     children_d = cg.get_children(node_ids)
diff --git a/pychunkedgraph/graph/operation.py b/pychunkedgraph/graph/operation.py
index 1a221d236..8c5d4484e 100644
--- a/pychunkedgraph/graph/operation.py
+++ b/pychunkedgraph/graph/operation.py
@@ -640,7 +640,7 @@ def _apply(
                 operation_id=operation_id,
                 time_stamp=timestamp,
                 parent_ts=self.parent_ts,
-                allow_same_segment_merge=self.allow_same_segment_merge
+                allow_same_segment_merge=self.allow_same_segment_merge,
             )
         return new_roots, new_l2_ids, fake_edge_rows + new_entries
 
@@ -751,18 +751,11 @@ def _apply(
         ):
             raise PreconditionError("Supervoxels must belong to the same object.")
 
-        with TimeIt("subgraph", self.cg.graph_id, operation_id):
-            l2id_agglomeration_d, _ = self.cg.get_l2_agglomerations(
-                self.cg.get_parents(
-                    self.removed_edges.ravel(), time_stamp=self.parent_ts
-                ),
-            )
         with TimeIt("remove_edges", self.cg.graph_id, operation_id):
             return edits.remove_edges(
                 self.cg,
                 operation_id=operation_id,
                 atomic_edges=self.removed_edges,
-                l2id_agglomeration_d=l2id_agglomeration_d,
                 time_stamp=timestamp,
                 parent_ts=self.parent_ts,
             )
@@ -929,7 +922,6 @@ def _apply(
                 self.cg,
                 operation_id=operation_id,
                 atomic_edges=self.removed_edges,
-                l2id_agglomeration_d=l2id_agglomeration_d,
                 time_stamp=timestamp,
                 parent_ts=self.parent_ts,
             )

From c6002b0703610b4cbd039ddcf492e8f8c83c87b5 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Wed, 17 Jul 2024 16:16:21 +0000
Subject: [PATCH 095/105]  fix(edits/split): filter out inactive cross edges AT
 EACH LAYER

---
 pychunkedgraph/__init__.py           |  2 +-
 pychunkedgraph/graph/chunkedgraph.py | 14 +++++---------
 pychunkedgraph/graph/edges/utils.py  | 22 +++++++++++++++++++++-
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 8d1c8625f..528787cfc 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.3"
+__version__ = "3.0.0"
diff --git a/pychunkedgraph/graph/chunkedgraph.py b/pychunkedgraph/graph/chunkedgraph.py
index 1836094f0..7823695db 100644
--- a/pychunkedgraph/graph/chunkedgraph.py
+++ b/pychunkedgraph/graph/chunkedgraph.py
@@ -3,6 +3,8 @@
 import time
 import typing
 import datetime
+from itertools import chain
+from functools import reduce
 
 import numpy as np
 from pychunkedgraph import __version__
@@ -667,8 +669,6 @@ def get_l2_agglomerations(
         Children of Level 2 Node IDs and edges.
         Edges are read from cloud storage.
         """
-        from itertools import chain
-        from functools import reduce
         from .misc import get_agglomerations
 
         chunk_ids = np.unique(self.get_chunk_ids_from_node_ids(level2_ids))
@@ -708,13 +708,9 @@ def get_l2_agglomerations(
             sv_parent_d.update(dict(zip(svs.tolist(), [l2id] * len(svs))))
 
         if active:
-            n1, n2 = all_chunk_edges.node_ids1, all_chunk_edges.node_ids2
-            layers = self.get_cross_chunk_edges_layer(all_chunk_edges.get_pairs())
-            max_layer = np.max(layers) + 1
-            parents1 = self.get_roots(n1, stop_layer=max_layer, time_stamp=time_stamp)
-            parents2 = self.get_roots(n2, stop_layer=max_layer, time_stamp=time_stamp)
-            mask = parents1 == parents2
-            all_chunk_edges = all_chunk_edges[mask]
+            all_chunk_edges = edge_utils.filter_inactive_cross_edges(
+                self, all_chunk_edges, time_stamp=time_stamp
+            )
 
         in_edges, out_edges, cross_edges = edge_utils.categorize_edges_v2(
             self.meta, all_chunk_edges, sv_parent_d
diff --git a/pychunkedgraph/graph/edges/utils.py b/pychunkedgraph/graph/edges/utils.py
index cd0e85fe8..76f8ea1d8 100644
--- a/pychunkedgraph/graph/edges/utils.py
+++ b/pychunkedgraph/graph/edges/utils.py
@@ -9,6 +9,7 @@
 from typing import Iterable
 from typing import Optional
 from collections import defaultdict
+from functools import reduce
 
 import fastremap
 import numpy as np
@@ -46,7 +47,9 @@ def concatenate_chunk_edges(chunk_edge_dicts: Iterable) -> Dict:
     return edges_dict
 
 
-def concatenate_cross_edge_dicts(edges_ds: Iterable[Dict], unique: bool = False) -> Dict:
+def concatenate_cross_edge_dicts(
+    edges_ds: Iterable[Dict], unique: bool = False
+) -> Dict:
     """Combines cross chunk edge dicts of form {layer id : edge list}."""
     result_d = defaultdict(list)
     for edges_d in edges_ds:
@@ -182,3 +185,20 @@ def get_edges_status(cg, edges: Iterable, time_stamp: Optional[float] = None):
         active_status.extend(mask)
     active_status = np.array(active_status, dtype=bool)
     return existence_status, active_status
+
+
+def filter_inactive_cross_edges(
+    cg, all_chunk_edges: Edges, time_stamp: Optional[float] = None
+):
+    result = []
+    layers = cg.get_cross_chunk_edges_layer(all_chunk_edges.get_pairs())
+    for layer in np.unique(layers):
+        layer_mask = layers == layer
+        parent_layer = layer + 1
+        layer_edges = all_chunk_edges[layer_mask]
+        n1, n2 = layer_edges.node_ids1, layer_edges.node_ids2
+        parents1 = cg.get_roots(n1, stop_layer=parent_layer, time_stamp=time_stamp)
+        parents2 = cg.get_roots(n2, stop_layer=parent_layer, time_stamp=time_stamp)
+        mask = parents1 == parents2
+        result.append(layer_edges[mask])
+    return reduce(lambda x, y: x + y, result, Edges([], []))

From 16096247324a074b04537257ef8450b43adb450f Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Fri, 30 Aug 2024 15:38:53 +0000
Subject: [PATCH 096/105] migration debug code

---
 .bumpversion.cfg              |  2 +-
 pychunkedgraph/__init__.py    |  2 +-
 pychunkedgraph/graph/edits.py | 22 ++++++++++++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f98e5ee64..6526fbc66 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.0.3
+current_version = 3.0.1
 commit = True
 tag = True
 
diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 528787cfc..055276878 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.0"
+__version__ = "3.0.1"
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 30e86951a..340cefadd 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -313,7 +313,7 @@ def remove_edges(
     cg,
     *,
     atomic_edges: Iterable[np.ndarray],
-    operation_id: basetypes.OPERATION_ID = None,
+    operation_id: basetypes.OPERATION_ID = None,  # type: ignore
     time_stamp: datetime.datetime = None,
     parent_ts: datetime.datetime = None,
 ):
@@ -522,7 +522,7 @@ def __init__(
         cg,
         *,
         new_l2_ids: Iterable,
-        operation_id: basetypes.OPERATION_ID,
+        operation_id: basetypes.OPERATION_ID,  # type: ignore
         time_stamp: datetime.datetime,
         new_old_id_d: Dict[np.uint64, Set[np.uint64]] = None,
         old_new_id_d: Dict[np.uint64, Set[np.uint64]] = None,
@@ -542,7 +542,7 @@ def __init__(
 
     def _update_id_lineage(
         self,
-        parent: basetypes.NODE_ID,
+        parent: basetypes.NODE_ID,  # type: ignore
         children: np.ndarray,
         layer: int,
         parent_layer: int,
@@ -658,7 +658,21 @@ def _create_new_parents(self, layer: int):
             self._update_id_lineage(parent, cc_ids, layer, parent_layer)
             self.cg.cache.children_cache[parent] = cc_ids
             cache_utils.update(self.cg.cache.parents_cache, cc_ids, parent)
-            sanity_check_single(self.cg, parent, self._operation_id)
+
+            try:
+                sanity_check_single(self.cg, parent, self._operation_id)
+            except AssertionError:
+                from pychunkedgraph.debug.utils import get_l2children
+
+                pairs = [
+                    (a, b) for idx, a in enumerate(cc_ids) for b in cc_ids[idx + 1 :]
+                ]
+                for c1, c2 in pairs:
+                    l2c1 = get_l2children(self.cg, c1)
+                    l2c2 = get_l2children(self.cg, c2)
+                    if np.intersect1d(l2c1, l2c2).size:
+                        msg = f"{self._operation_id}:{c1} {c2} have common children."
+                        raise ValueError(msg)
 
     def run(self) -> Iterable:
         """

From 82683982821e8294894eafa4db9b45fd729fb9b9 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 22 Sep 2024 15:30:45 +0000
Subject: [PATCH 097/105] use parent timestamps to lift cx edges

---
 pychunkedgraph/ingest/upgrade/atomic_layer.py | 72 ++++---------------
 pychunkedgraph/ingest/upgrade/parent_layer.py | 20 +++---
 pychunkedgraph/ingest/upgrade/utils.py        | 50 +++++++++++++
 3 files changed, 74 insertions(+), 68 deletions(-)

diff --git a/pychunkedgraph/ingest/upgrade/atomic_layer.py b/pychunkedgraph/ingest/upgrade/atomic_layer.py
index 6c4244968..a975146de 100644
--- a/pychunkedgraph/ingest/upgrade/atomic_layer.py
+++ b/pychunkedgraph/ingest/upgrade/atomic_layer.py
@@ -1,50 +1,19 @@
 # pylint: disable=invalid-name, missing-docstring, c-extension-no-member
+
 from datetime import timedelta
 
 import fastremap
 import numpy as np
 from pychunkedgraph.graph import ChunkedGraph
 from pychunkedgraph.graph.attributes import Connectivity
-from pychunkedgraph.graph.attributes import Hierarchy
 from pychunkedgraph.graph.utils import serializers
 
-from .utils import exists_as_parent
-
-
-def get_parent_timestamps(cg, supervoxels, start_time=None, end_time=None) -> set:
-    """
-    Timestamps of when the given supervoxels were edited, in the given time range.
-    """
-    response = cg.client.read_nodes(
-        node_ids=supervoxels,
-        start_time=start_time,
-        end_time=end_time,
-        end_time_inclusive=False,
-    )
-    result = set()
-    for v in response.values():
-        for cell in v[Hierarchy.Parent]:
-            valid = cell.timestamp >= start_time or cell.timestamp < end_time
-            assert valid, f"{cell.timestamp}, {start_time}"
-            result.add(cell.timestamp)
-    return result
+from .utils import exists_as_parent, get_parent_timestamps
 
 
-def get_edit_timestamps(cg: ChunkedGraph, edges_d, start_ts, end_ts) -> list:
-    """
-    Timestamps of when post-side supervoxels were involved in an edit.
-    Post-side - supervoxels in the neighbor chunk.
-    This is required because we need to update edges from both sides.
-    """
-    atomic_cx_edges = np.concatenate(list(edges_d.values()))
-    timestamps = get_parent_timestamps(
-        cg, atomic_cx_edges[:, 1], start_time=start_ts, end_time=end_ts
-    )
-    timestamps.add(start_ts)
-    return sorted(timestamps)
-
-
-def update_cross_edges(cg: ChunkedGraph, node, cx_edges_d, node_ts, end_ts) -> list:
+def update_cross_edges(
+    cg: ChunkedGraph, node, cx_edges_d, node_ts, timestamps, earliest_ts
+) -> list:
     """
     Helper function to update a single L2 ID.
     Returns a list of mutations with given timestamps.
@@ -58,10 +27,9 @@ def update_cross_edges(cg: ChunkedGraph, node, cx_edges_d, node_ts, end_ts) -> l
         assert not exists_as_parent(cg, node, edges[:, 0])
         return rows
 
-    timestamps = [node_ts]
-    if node_ts != end_ts:
-        timestamps = get_edit_timestamps(cg, cx_edges_d, node_ts, end_ts)
     for ts in timestamps:
+        if ts < earliest_ts:
+            ts = earliest_ts
         val_dict = {}
         svs = edges[:, 1]
         parents = cg.get_parents(svs, time_stamp=ts)
@@ -80,31 +48,21 @@ def update_cross_edges(cg: ChunkedGraph, node, cx_edges_d, node_ts, end_ts) -> l
 
 
 def update_nodes(cg: ChunkedGraph, nodes) -> list:
-    # get start_ts when node becomes valid
     nodes_ts = cg.get_node_timestamps(nodes, return_numpy=False, normalize=True)
+    earliest_ts = cg.get_earliest_timestamp()
+    timestamps_d = get_parent_timestamps(cg, nodes)
     cx_edges_d = cg.get_atomic_cross_edges(nodes)
-    children_d = cg.get_children(nodes)
-
     rows = []
-    for node, start_ts in zip(nodes, nodes_ts):
+    for node, node_ts in zip(nodes, nodes_ts):
         if cg.get_parent(node) is None:
             # invalid id caused by failed ingest task
             continue
-        node_cx_edges_d = cx_edges_d.get(node, {})
-        if not node_cx_edges_d:
+        _cx_edges_d = cx_edges_d.get(node, {})
+        if not _cx_edges_d:
             continue
-
-        # get end_ts when node becomes invalid (bigtable resolution is in ms)
-        start = start_ts + timedelta(milliseconds=1)
-        _timestamps = get_parent_timestamps(cg, children_d[node], start_time=start)
-        try:
-            end_ts = sorted(_timestamps)[0]
-        except IndexError:
-            # start_ts == end_ts means there has been no edit involving this node
-            # meaning only one timestamp to update cross edges, start_ts
-            end_ts = start_ts
-        # for each timestamp until end_ts, update cross chunk edges of node
-        _rows = update_cross_edges(cg, node, node_cx_edges_d, start_ts, end_ts)
+        _rows = update_cross_edges(
+            cg, node, _cx_edges_d, node_ts, timestamps_d[node], earliest_ts
+        )
         rows.extend(_rows)
     return rows
 
diff --git a/pychunkedgraph/ingest/upgrade/parent_layer.py b/pychunkedgraph/ingest/upgrade/parent_layer.py
index 8674e45b7..0606ff674 100644
--- a/pychunkedgraph/ingest/upgrade/parent_layer.py
+++ b/pychunkedgraph/ingest/upgrade/parent_layer.py
@@ -14,7 +14,7 @@
 from pychunkedgraph.graph.types import empty_2d
 from pychunkedgraph.utils.general import chunked
 
-from .utils import exists_as_parent
+from .utils import exists_as_parent, get_parent_timestamps
 
 
 CHILDREN = {}
@@ -50,7 +50,7 @@ def _get_cx_edges_at_timestamp(node, response, ts):
 
 
 def _populate_cx_edges_with_timestamps(
-    cg: ChunkedGraph, layer: int, nodes: list, nodes_ts: list
+    cg: ChunkedGraph, layer: int, nodes: list, earliest_ts
 ):
     """
     Collect timestamps of edits from children, since we use the same timestamp
@@ -61,15 +61,13 @@ def _populate_cx_edges_with_timestamps(
     attrs = [Connectivity.CrossChunkEdge[l] for l in range(layer, cg.meta.layer_count)]
     all_children = np.concatenate(list(CHILDREN.values()))
     response = cg.client.read_nodes(node_ids=all_children, properties=attrs)
-    for node, node_ts in zip(nodes, nodes_ts):
-        timestamps = set([node_ts])
-        for child in CHILDREN[node]:
-            if child not in response:
-                continue
-            for cells in response[child].values():
-                timestamps.update([c.timestamp for c in cells if c.timestamp > node_ts])
+    timestamps_d = get_parent_timestamps(cg, nodes)
+    for node in nodes:
         CX_EDGES[node] = {}
+        timestamps = timestamps_d[node]
         for ts in sorted(timestamps):
+            if ts < earliest_ts:
+                ts = earliest_ts
             CX_EDGES[node][ts] = _get_cx_edges_at_timestamp(node, response, ts)
 
 
@@ -142,19 +140,19 @@ def update_chunk(
     start = time.time()
     x, y, z = chunk_coords
     chunk_id = cg.get_chunk_id(layer=layer, x=x, y=y, z=z)
+    earliest_ts = cg.get_earliest_timestamp()
     _populate_nodes_and_children(cg, chunk_id, nodes=nodes)
     if not CHILDREN:
         return
     nodes = list(CHILDREN.keys())
     random.shuffle(nodes)
     nodes_ts = cg.get_node_timestamps(nodes, return_numpy=False, normalize=True)
-    _populate_cx_edges_with_timestamps(cg, layer, nodes, nodes_ts)
+    _populate_cx_edges_with_timestamps(cg, layer, nodes, earliest_ts)
 
     task_size = int(math.ceil(len(nodes) / mp.cpu_count() / 2))
     chunked_nodes = chunked(nodes, task_size)
     chunked_nodes_ts = chunked(nodes_ts, task_size)
     cg_info = cg.get_serialized_info()
-    earliest_ts = cg.get_earliest_timestamp()
 
     multi_args = []
     for chunk, ts_chunk in zip(chunked_nodes, chunked_nodes_ts):
diff --git a/pychunkedgraph/ingest/upgrade/utils.py b/pychunkedgraph/ingest/upgrade/utils.py
index 43c9a3034..cc43b561a 100644
--- a/pychunkedgraph/ingest/upgrade/utils.py
+++ b/pychunkedgraph/ingest/upgrade/utils.py
@@ -1,3 +1,9 @@
+# pylint: disable=invalid-name, missing-docstring
+
+from collections import defaultdict
+from datetime import timedelta
+
+import numpy as np
 from pychunkedgraph.graph import ChunkedGraph
 from pychunkedgraph.graph.attributes import Hierarchy
 
@@ -11,3 +17,47 @@ def exists_as_parent(cg: ChunkedGraph, parent, nodes) -> bool:
     for cells in response.values():
         parents.update([cell.value for cell in cells])
     return parent in parents
+
+
+def get_edit_timestamps(cg: ChunkedGraph, edges_d, start_ts, end_ts) -> list:
+    """
+    Timestamps of when post-side nodes were involved in an edit.
+    Post-side - nodes in the neighbor chunk.
+    This is required because we need to update edges from both sides.
+    """
+    cx_edges = np.concatenate(list(edges_d.values()))
+    timestamps = get_parent_timestamps(
+        cg, cx_edges[:, 1], start_time=start_ts, end_time=end_ts
+    )
+    timestamps.add(start_ts)
+    return sorted(timestamps)
+
+
+def get_end_ts(cg: ChunkedGraph, children, start_ts):
+    # get end_ts when node becomes invalid (bigtable resolution is in ms)
+    start = start_ts + timedelta(milliseconds=1)
+    _timestamps = get_parent_timestamps(cg, children, start_time=start)
+    try:
+        end_ts = sorted(_timestamps)[0]
+    except IndexError:
+        # start_ts == end_ts means there has been no edit involving this node
+        # meaning only one timestamp to update cross edges, start_ts
+        end_ts = start_ts
+    return end_ts
+
+
+def get_parent_timestamps(cg: ChunkedGraph, nodes) -> dict[int, set]:
+    """
+    Timestamps of when the given nodes were edited.
+    """
+    response = cg.client.read_nodes(
+        node_ids=nodes,
+        properties=[Hierarchy.Parent],
+        end_time_inclusive=False,
+    )
+
+    result = defaultdict(set)
+    for k, v in response.items():
+        for cell in v[Hierarchy.Parent]:
+            result[k].add(cell.timestamp)
+    return result

From c93efe98f57d4a75a5bbb68c645afe2e71c5808c Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Mon, 23 Sep 2024 14:48:14 +0000
Subject: [PATCH 098/105] make dynamic mesh dir graph specific

---
 pychunkedgraph/__init__.py | 2 +-
 workers/mesh_worker.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 055276878..8e10cb462 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.1"
+__version__ = "3.0.4"
diff --git a/workers/mesh_worker.py b/workers/mesh_worker.py
index 238bad7a9..2dd3dd34d 100644
--- a/workers/mesh_worker.py
+++ b/workers/mesh_worker.py
@@ -39,7 +39,7 @@ def callback(payload):
     try:
         mesh_dir = cg.meta.dataset_info["mesh"]
         mesh_meta = cg.meta.dataset_info["mesh_metadata"]
-        cv_unsharded_mesh_dir = mesh_meta.get("unsharded_mesh_dir", "dynamic")
+        cv_unsharded_mesh_dir = mesh_meta.get("unsharded_mesh_dir", f"dynamic_{cg.graph_id}")
     except KeyError:
         logging.warning(f"No metadata found for {cg.graph_id}; ignoring...")
         return

From d5fa9fe538d3d7ca9febc3a1c416887e10c44bed Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Thu, 26 Sep 2024 14:45:14 +0000
Subject: [PATCH 099/105] fix(upgrade): use hierarchy from supervoxels

---
 pychunkedgraph/__init__.py                    | 2 +-
 pychunkedgraph/graph/edits.py                 | 7 ++++---
 pychunkedgraph/ingest/upgrade/parent_layer.py | 4 +++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 8e10cb462..e94f36fe8 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.4"
+__version__ = "3.0.5"
diff --git a/pychunkedgraph/graph/edits.py b/pychunkedgraph/graph/edits.py
index 340cefadd..afe1b3abf 100644
--- a/pychunkedgraph/graph/edits.py
+++ b/pychunkedgraph/graph/edits.py
@@ -497,7 +497,7 @@ def _update_neighbor_cross_edges(
     return updated_entries
 
 
-def _get_supervoxels(cg, node_ids):
+def get_supervoxels(cg, node_ids):
     """Returns the first supervoxel found for each node_id."""
     result = {}
     node_ids_copy = np.copy(node_ids)
@@ -606,7 +606,7 @@ def _update_cross_edge_cache(self, parent, children):
         )
         cx_edges_d = concatenate_cross_edge_dicts(cx_edges_d.values())
         edge_nodes = np.unique(np.concatenate([*cx_edges_d.values(), types.empty_2d]))
-        edge_supervoxels = _get_supervoxels(self.cg, edge_nodes)
+        edge_supervoxels = get_supervoxels(self.cg, edge_nodes)
         edge_parents = self.cg.get_roots(
             edge_supervoxels,
             stop_layer=parent_layer,
@@ -671,7 +671,8 @@ def _create_new_parents(self, layer: int):
                     l2c1 = get_l2children(self.cg, c1)
                     l2c2 = get_l2children(self.cg, c2)
                     if np.intersect1d(l2c1, l2c2).size:
-                        msg = f"{self._operation_id}:{c1} {c2} have common children."
+                        c = np.intersect1d(l2c1, l2c2)
+                        msg = f"{self._operation_id}: {layer} {c1} {c2} have common children {c}"
                         raise ValueError(msg)
 
     def run(self) -> Iterable:
diff --git a/pychunkedgraph/ingest/upgrade/parent_layer.py b/pychunkedgraph/ingest/upgrade/parent_layer.py
index 0606ff674..2869fcf85 100644
--- a/pychunkedgraph/ingest/upgrade/parent_layer.py
+++ b/pychunkedgraph/ingest/upgrade/parent_layer.py
@@ -10,6 +10,7 @@
 
 from pychunkedgraph.graph import ChunkedGraph
 from pychunkedgraph.graph.attributes import Connectivity, Hierarchy
+from pychunkedgraph.graph.edits import get_supervoxels
 from pychunkedgraph.graph.utils import serializers
 from pychunkedgraph.graph.types import empty_2d
 from pychunkedgraph.utils.general import chunked
@@ -101,7 +102,8 @@ def update_cross_edges(cg: ChunkedGraph, layer, node, node_ts, earliest_ts) -> l
         if edges.size == 0:
             continue
         nodes = np.unique(edges[:, 1])
-        parents = cg.get_roots(nodes, time_stamp=ts, stop_layer=layer, ceil=False)
+        svs = get_supervoxels(cg, nodes)
+        parents = cg.get_roots(svs, time_stamp=ts, stop_layer=layer, ceil=False)
         edge_parents_d = dict(zip(nodes, parents))
         val_dict = {}
         for _layer, layer_edges in cx_edges_d.items():

From 10373410b5147c4690f45ccdb41dcd86bb0c7585 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Thu, 26 Sep 2024 16:59:39 +0000
Subject: [PATCH 100/105] fix(upgrade): include cx edges at node_ts explicitly

---
 pychunkedgraph/ingest/upgrade/parent_layer.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pychunkedgraph/ingest/upgrade/parent_layer.py b/pychunkedgraph/ingest/upgrade/parent_layer.py
index 2869fcf85..a7e79b8f0 100644
--- a/pychunkedgraph/ingest/upgrade/parent_layer.py
+++ b/pychunkedgraph/ingest/upgrade/parent_layer.py
@@ -51,7 +51,7 @@ def _get_cx_edges_at_timestamp(node, response, ts):
 
 
 def _populate_cx_edges_with_timestamps(
-    cg: ChunkedGraph, layer: int, nodes: list, earliest_ts
+    cg: ChunkedGraph, layer: int, nodes: list, nodes_ts:list, earliest_ts
 ):
     """
     Collect timestamps of edits from children, since we use the same timestamp
@@ -63,9 +63,10 @@ def _populate_cx_edges_with_timestamps(
     all_children = np.concatenate(list(CHILDREN.values()))
     response = cg.client.read_nodes(node_ids=all_children, properties=attrs)
     timestamps_d = get_parent_timestamps(cg, nodes)
-    for node in nodes:
+    for node, node_ts in zip(nodes, nodes_ts):
         CX_EDGES[node] = {}
         timestamps = timestamps_d[node]
+        timestamps.add(node_ts)
         for ts in sorted(timestamps):
             if ts < earliest_ts:
                 ts = earliest_ts
@@ -82,6 +83,7 @@ def update_cross_edges(cg: ChunkedGraph, layer, node, node_ts, earliest_ts) -> l
         try:
             cx_edges_d = CX_EDGES[node][node_ts]
         except KeyError:
+            print(CX_EDGES)
             raise KeyError(f"{node}:{node_ts}")
         edges = np.concatenate([empty_2d] + list(cx_edges_d.values()))
         if edges.size:
@@ -149,7 +151,7 @@ def update_chunk(
     nodes = list(CHILDREN.keys())
     random.shuffle(nodes)
     nodes_ts = cg.get_node_timestamps(nodes, return_numpy=False, normalize=True)
-    _populate_cx_edges_with_timestamps(cg, layer, nodes, earliest_ts)
+    _populate_cx_edges_with_timestamps(cg, layer, nodes, nodes_ts, earliest_ts)
 
     task_size = int(math.ceil(len(nodes) / mp.cpu_count() / 2))
     chunked_nodes = chunked(nodes, task_size)

From f1100adf6599fdb352c134d33e6aae6b84e2728e Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 29 Sep 2024 19:42:14 +0000
Subject: [PATCH 101/105] adds job type guard, flush_redis prompts, improved
 status output

---
 pychunkedgraph/__init__.py                    |  2 +-
 pychunkedgraph/ingest/cli.py                  | 27 ++++++++--
 pychunkedgraph/ingest/cli_upgrade.py          | 29 ++++++++---
 pychunkedgraph/ingest/upgrade/parent_layer.py | 22 ++++-----
 pychunkedgraph/ingest/utils.py                | 49 ++++++++++++++++---
 pychunkedgraph/utils/redis.py                 |  4 +-
 6 files changed, 97 insertions(+), 36 deletions(-)

diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index e94f36fe8..6ed01825f 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.5"
+__version__ = "3.0.6"
diff --git a/pychunkedgraph/ingest/cli.py b/pychunkedgraph/ingest/cli.py
index 928e1852f..c50525ec6 100644
--- a/pychunkedgraph/ingest/cli.py
+++ b/pychunkedgraph/ingest/cli.py
@@ -16,15 +16,17 @@
     bootstrap,
     chunk_id_str,
     print_completion_rate,
-    print_ingest_status,
+    print_status,
     queue_layer_helper,
+    job_type_guard,
 )
 from .simple_tests import run_all
 from .create.parent_layer import add_parent_chunk
 from ..graph.chunkedgraph import ChunkedGraph
 from ..utils.redis import get_redis_connection, keys as r_keys
 
-ingest_cli = AppGroup("ingest")
+group_name = "ingest"
+ingest_cli = AppGroup(group_name)
 
 
 def init_ingest_cmds(app):
@@ -32,6 +34,8 @@ def init_ingest_cmds(app):
 
 
 @ingest_cli.command("flush_redis")
+@click.confirmation_option(prompt="Are you sure you want to flush redis?")
+@job_type_guard(group_name)
 def flush_redis():
     """FLush redis db."""
     redis = get_redis_connection()
@@ -44,6 +48,7 @@ def flush_redis():
 @click.option("--raw", is_flag=True, help="Read edges from agglomeration output.")
 @click.option("--test", is_flag=True, help="Test 8 chunks at the center of dataset.")
 @click.option("--retry", is_flag=True, help="Rerun without creating a new table.")
+@job_type_guard(group_name)
 def ingest_graph(
     graph_id: str, dataset: click.Path, raw: bool, test: bool, retry: bool
 ):
@@ -51,6 +56,8 @@ def ingest_graph(
     Main ingest command.
     Takes ingest config from a yaml file and queues atomic tasks.
     """
+    redis = get_redis_connection()
+    redis.set(r_keys.JOB_TYPE, group_name)
     with open(dataset, "r") as stream:
         config = yaml.safe_load(stream)
 
@@ -70,6 +77,7 @@ def ingest_graph(
 @click.argument("graph_id", type=str)
 @click.argument("dataset", type=click.Path(exists=True))
 @click.option("--raw", is_flag=True)
+@job_type_guard(group_name)
 def pickle_imanager(graph_id: str, dataset: click.Path, raw: bool):
     """
     Load ingest config into redis server.
@@ -83,11 +91,12 @@ def pickle_imanager(graph_id: str, dataset: click.Path, raw: bool):
 
     meta, ingest_config, _ = bootstrap(graph_id, config=config, raw=raw)
     imanager = IngestionManager(ingest_config, meta)
-    imanager.redis  # pylint: disable=pointless-statement
+    imanager.redis.set(r_keys.JOB_TYPE, group_name)
 
 
 @ingest_cli.command("layer")
 @click.argument("parent_layer", type=int)
+@job_type_guard(group_name)
 def queue_layer(parent_layer):
     """
     Queue all chunk tasks at a given layer.
@@ -100,16 +109,21 @@ def queue_layer(parent_layer):
 
 
 @ingest_cli.command("status")
+@job_type_guard(group_name)
 def ingest_status():
     """Print ingest status to console by layer."""
     redis = get_redis_connection()
-    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    print_ingest_status(imanager, redis)
+    try:
+        imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+        print_status(imanager, redis)
+    except TypeError as err:
+        print(f"\nNo current `{group_name}` job found in redis: {err}")
 
 
 @ingest_cli.command("chunk")
 @click.argument("queue", type=str)
 @click.argument("chunk_info", nargs=4, type=int)
+@job_type_guard(group_name)
 def ingest_chunk(queue: str, chunk_info):
     """Manually queue chunk when a job is stuck for whatever reason."""
     redis = get_redis_connection()
@@ -135,6 +149,7 @@ def ingest_chunk(queue: str, chunk_info):
 @click.argument("graph_id", type=str)
 @click.argument("chunk_info", nargs=4, type=int)
 @click.option("--n_threads", type=int, default=1)
+@job_type_guard(group_name)
 def ingest_chunk_local(graph_id: str, chunk_info, n_threads: int):
     """Manually ingest a chunk on a local machine."""
     layer, coords = chunk_info[0], chunk_info[1:]
@@ -150,6 +165,7 @@ def ingest_chunk_local(graph_id: str, chunk_info, n_threads: int):
 @ingest_cli.command("rate")
 @click.argument("layer", type=int)
 @click.option("--span", default=10, help="Time span to calculate rate.")
+@job_type_guard(group_name)
 def rate(layer: int, span: int):
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
@@ -158,5 +174,6 @@ def rate(layer: int, span: int):
 
 @ingest_cli.command("run_tests")
 @click.argument("graph_id", type=str)
+@job_type_guard(group_name)
 def run_tests(graph_id):
     run_all(ChunkedGraph(graph_id=graph_id))
diff --git a/pychunkedgraph/ingest/cli_upgrade.py b/pychunkedgraph/ingest/cli_upgrade.py
index c77c0be64..84939544b 100644
--- a/pychunkedgraph/ingest/cli_upgrade.py
+++ b/pychunkedgraph/ingest/cli_upgrade.py
@@ -24,15 +24,17 @@
 from .utils import (
     chunk_id_str,
     print_completion_rate,
-    print_ingest_status,
+    print_status,
     queue_layer_helper,
     start_ocdbt_server,
+    job_type_guard,
 )
 from ..graph.chunkedgraph import ChunkedGraph, ChunkedGraphMeta
 from ..utils.redis import get_redis_connection
 from ..utils.redis import keys as r_keys
 
-upgrade_cli = AppGroup("upgrade")
+group_name = "upgrade"
+upgrade_cli = AppGroup(group_name)
 
 
 def init_upgrade_cmds(app):
@@ -40,6 +42,8 @@ def init_upgrade_cmds(app):
 
 
 @upgrade_cli.command("flush_redis")
+@click.confirmation_option(prompt="Are you sure you want to flush redis?")
+@job_type_guard(group_name)
 def flush_redis():
     """FLush redis db."""
     redis = get_redis_connection()
@@ -50,11 +54,13 @@ def flush_redis():
 @click.argument("graph_id", type=str)
 @click.option("--test", is_flag=True, help="Test 8 chunks at the center of dataset.")
 @click.option("--ocdbt", is_flag=True, help="Store edges using ts ocdbt kv store.")
+@job_type_guard(group_name)
 def upgrade_graph(graph_id: str, test: bool, ocdbt: bool):
     """
-    Main upgrade command.
-    Takes upgrade config from a yaml file and queues atomic tasks.
+    Main upgrade command. Queues atomic tasks.
     """
+    redis = get_redis_connection()
+    redis.set(r_keys.JOB_TYPE, group_name)
     ingest_config = IngestConfig(TEST_RUN=test)
     cg = ChunkedGraph(graph_id=graph_id)
     cg.client.add_graph_version(__version__, overwrite=True)
@@ -91,6 +97,7 @@ def upgrade_graph(graph_id: str, test: bool, ocdbt: bool):
 
 @upgrade_cli.command("layer")
 @click.argument("parent_layer", type=int)
+@job_type_guard(group_name)
 def queue_layer(parent_layer):
     """
     Queue all chunk tasks at a given layer.
@@ -103,17 +110,22 @@ def queue_layer(parent_layer):
 
 
 @upgrade_cli.command("status")
-def ingest_status():
+@job_type_guard(group_name)
+def upgrade_status():
     """Print upgrade status to console."""
     redis = get_redis_connection()
-    imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
-    print_ingest_status(imanager, redis, upgrade=True)
+    try:
+        imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
+        print_status(imanager, redis, upgrade=True)
+    except TypeError as err:
+        print(f"\nNo current `{group_name}` job found in redis: {err}")
 
 
 @upgrade_cli.command("chunk")
 @click.argument("queue", type=str)
 @click.argument("chunk_info", nargs=4, type=int)
-def ingest_chunk(queue: str, chunk_info):
+@job_type_guard(group_name)
+def upgrade_chunk(queue: str, chunk_info):
     """Manually queue chunk when a job is stuck for whatever reason."""
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
@@ -137,6 +149,7 @@ def ingest_chunk(queue: str, chunk_info):
 @upgrade_cli.command("rate")
 @click.argument("layer", type=int)
 @click.option("--span", default=10, help="Time span to calculate rate.")
+@job_type_guard(group_name)
 def rate(layer: int, span: int):
     redis = get_redis_connection()
     imanager = IngestionManager.from_pickle(redis.get(r_keys.INGESTION_MANAGER))
diff --git a/pychunkedgraph/ingest/upgrade/parent_layer.py b/pychunkedgraph/ingest/upgrade/parent_layer.py
index a7e79b8f0..7c95cc1b6 100644
--- a/pychunkedgraph/ingest/upgrade/parent_layer.py
+++ b/pychunkedgraph/ingest/upgrade/parent_layer.py
@@ -6,7 +6,7 @@
 
 import fastremap
 import numpy as np
-from multiwrapper import multiprocessing_utils as mu
+from tqdm import tqdm
 
 from pychunkedgraph.graph import ChunkedGraph
 from pychunkedgraph.graph.attributes import Connectivity, Hierarchy
@@ -51,7 +51,7 @@ def _get_cx_edges_at_timestamp(node, response, ts):
 
 
 def _populate_cx_edges_with_timestamps(
-    cg: ChunkedGraph, layer: int, nodes: list, nodes_ts:list, earliest_ts
+    cg: ChunkedGraph, layer: int, nodes: list, nodes_ts: list, earliest_ts
 ):
     """
     Collect timestamps of edits from children, since we use the same timestamp
@@ -83,7 +83,6 @@ def update_cross_edges(cg: ChunkedGraph, layer, node, node_ts, earliest_ts) -> l
         try:
             cx_edges_d = CX_EDGES[node][node_ts]
         except KeyError:
-            print(CX_EDGES)
             raise KeyError(f"{node}:{node_ts}")
         edges = np.concatenate([empty_2d] + list(cx_edges_d.values()))
         if edges.size:
@@ -158,15 +157,14 @@ def update_chunk(
     chunked_nodes_ts = chunked(nodes_ts, task_size)
     cg_info = cg.get_serialized_info()
 
-    multi_args = []
+    tasks = []
     for chunk, ts_chunk in zip(chunked_nodes, chunked_nodes_ts):
         args = (cg_info, layer, chunk, ts_chunk, earliest_ts)
-        multi_args.append(args)
-
-    print(f"nodes: {len(nodes)}, tasks: {len(multi_args)}, size: {task_size}")
-    mu.multiprocess_func(
-        _update_cross_edges_helper,
-        multi_args,
-        n_threads=min(len(multi_args), mp.cpu_count()),
-    )
+        tasks.append(args)
+
+    with mp.Pool(min(mp.cpu_count(), len(tasks))) as pool:
+        tqdm(
+            pool.imap_unordered(_update_cross_edges_helper, tasks),
+            total=len(tasks),
+        )
     print(f"total elaspsed time: {time.time() - start}")
diff --git a/pychunkedgraph/ingest/utils.py b/pychunkedgraph/ingest/utils.py
index 3d573ce37..1692db43b 100644
--- a/pychunkedgraph/ingest/utils.py
+++ b/pychunkedgraph/ingest/utils.py
@@ -1,6 +1,7 @@
 # pylint: disable=invalid-name, missing-docstring
 
 import logging
+import functools
 from os import environ
 from time import sleep
 from typing import Any, Generator, Tuple
@@ -16,6 +17,8 @@
 from ..graph.client import BackendClientInfo
 from ..graph.client.bigtable import BigTableConfig
 from ..utils.general import chunked
+from ..utils.redis import get_redis_connection
+from ..utils.redis import keys as r_keys
 
 chunk_id_str = lambda layer, coords: f"{layer}_{'_'.join(map(str, coords))}"
 
@@ -116,7 +119,7 @@ def print_completion_rate(imanager: IngestionManager, layer: int, span: int = 10
     print(f"{rate} chunks per second.")
 
 
-def print_ingest_status(imanager: IngestionManager, redis, upgrade: bool = False):
+def print_status(imanager: IngestionManager, redis, upgrade: bool = False):
     """
     Helper to print status to console.
     If `upgrade=True`, status does not include the root layer,
@@ -128,6 +131,7 @@ def print_ingest_status(imanager: IngestionManager, redis, upgrade: bool = False
     layer_counts = imanager.cg_meta.layer_chunk_counts
 
     pipeline = redis.pipeline()
+    pipeline.get(r_keys.JOB_TYPE)
     worker_busy = []
     for layer in layers:
         pipeline.scard(f"{layer}c")
@@ -138,25 +142,32 @@ def print_ingest_status(imanager: IngestionManager, redis, upgrade: bool = False
         worker_busy.append(sum([w.get_state() == WorkerStatus.BUSY for w in workers]))
 
     results = pipeline.execute()
+    job_type = "not_available"
+    if results[0] is not None:
+        job_type = results[0].decode()
     completed = []
     queued = []
     failed = []
-    for i in range(0, len(results), 3):
+    for i in range(1, len(results), 3):
         result = results[i : i + 3]
         completed.append(result[0])
         queued.append(result[1])
         failed.append(result[2])
 
-    print(f"version: \t{imanager.cg.version}")
-    print(f"graph_id: \t{imanager.cg.graph_id}")
-    print(f"chunk_size: \t{imanager.cg.meta.graph_config.CHUNK_SIZE}")
-    print("\nlayer status:")
+    header = (
+        f"\njob_type: \t{job_type}"
+        f"\nversion: \t{imanager.cg.version}"
+        f"\ngraph_id: \t{imanager.cg.graph_id}"
+        f"\nchunk_size: \t{imanager.cg.meta.graph_config.CHUNK_SIZE}"
+        "\n\nlayer status:"
+    )
+    print(header)
     for layer, done, count in zip(layers, completed, layer_counts):
-        print(f"{layer}\t: {done:<9} / {count}")
+        print(f"{layer}\t| {done:9} / {count} \t| {done/count:6.1%}")
 
     print("\n\nqueue status:")
     for layer, q, f, wb in zip(layers, queued, failed, worker_busy):
-        print(f"l{layer}\t: queued: {q:<10} failed: {f:<10} busy: {wb}")
+        print(f"l{layer}\t| queued: {q:<10} failed: {f:<10} busy: {wb}")
 
 
 def queue_layer_helper(parent_layer: int, imanager: IngestionManager, fn):
@@ -190,3 +201,25 @@ def queue_layer_helper(parent_layer: int, imanager: IngestionManager, fn):
                 )
             )
         q.enqueue_many(job_datas)
+
+
+def job_type_guard(job_type: str):
+    def decorator_job_type_guard(func):
+        @functools.wraps(func)
+        def wrapper_job_type_guard(*args, **kwargs):
+            redis = get_redis_connection()
+            current_type = redis.get(r_keys.JOB_TYPE)
+            if current_type is not None:
+                current_type = current_type.decode()
+                msg = (
+                    f"Currently running `{current_type}`. You're attempting to run `{job_type}`."
+                    f"\nRun `[flask] {current_type} flush_redis` to clear the current job and restart."
+                )
+                if current_type != job_type:
+                    print(f"\n*WARNING*\n{msg}")
+                    exit(1)
+            return func(*args, **kwargs)
+
+        return wrapper_job_type_guard
+
+    return decorator_job_type_guard
diff --git a/pychunkedgraph/utils/redis.py b/pychunkedgraph/utils/redis.py
index 420a849f1..fa43c867a 100644
--- a/pychunkedgraph/utils/redis.py
+++ b/pychunkedgraph/utils/redis.py
@@ -19,8 +19,8 @@
 REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "")
 REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/0"
 
-keys_fields = ("INGESTION_MANAGER",)
-keys_defaults = ("pcg:imanager",)
+keys_fields = ("INGESTION_MANAGER", "JOB_TYPE")
+keys_defaults = ("pcg:imanager", "pcg:job_type")
 Keys = namedtuple("keys", keys_fields, defaults=keys_defaults)
 
 keys = Keys()

From e62390a42c8e5237abb98e0d0a01d647a96b0a7f Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sun, 10 Nov 2024 19:47:48 +0000
Subject: [PATCH 102/105] fix(upgrade): include timestamps for partner
 supervoxel parents

---
 pychunkedgraph/__init__.py                    | 2 +-
 pychunkedgraph/ingest/upgrade/atomic_layer.py | 8 ++++++--
 pychunkedgraph/ingest/upgrade/parent_layer.py | 8 +++++---
 pychunkedgraph/ingest/utils.py                | 3 ++-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 6ed01825f..c11769ec9 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.6"
+__version__ = "3.0.7"
diff --git a/pychunkedgraph/ingest/upgrade/atomic_layer.py b/pychunkedgraph/ingest/upgrade/atomic_layer.py
index a975146de..c9c8bdb11 100644
--- a/pychunkedgraph/ingest/upgrade/atomic_layer.py
+++ b/pychunkedgraph/ingest/upgrade/atomic_layer.py
@@ -12,7 +12,7 @@
 
 
 def update_cross_edges(
-    cg: ChunkedGraph, node, cx_edges_d, node_ts, timestamps, earliest_ts
+    cg: ChunkedGraph, node, cx_edges_d: dict, node_ts, timestamps: set, earliest_ts
 ) -> list:
     """
     Helper function to update a single L2 ID.
@@ -27,7 +27,11 @@ def update_cross_edges(
         assert not exists_as_parent(cg, node, edges[:, 0])
         return rows
 
-    for ts in timestamps:
+    partner_parent_ts_d = get_parent_timestamps(cg, edges[:, 1])
+    for v in partner_parent_ts_d.values():
+        timestamps.update(v)
+
+    for ts in sorted(timestamps):
         if ts < earliest_ts:
             ts = earliest_ts
         val_dict = {}
diff --git a/pychunkedgraph/ingest/upgrade/parent_layer.py b/pychunkedgraph/ingest/upgrade/parent_layer.py
index 7c95cc1b6..dace88b43 100644
--- a/pychunkedgraph/ingest/upgrade/parent_layer.py
+++ b/pychunkedgraph/ingest/upgrade/parent_layer.py
@@ -163,8 +163,10 @@ def update_chunk(
         tasks.append(args)
 
     with mp.Pool(min(mp.cpu_count(), len(tasks))) as pool:
-        tqdm(
-            pool.imap_unordered(_update_cross_edges_helper, tasks),
-            total=len(tasks),
+        _ = list(
+            tqdm(
+                pool.imap_unordered(_update_cross_edges_helper, tasks),
+                total=len(tasks),
+            )
         )
     print(f"total elaspsed time: {time.time() - start}")
diff --git a/pychunkedgraph/ingest/utils.py b/pychunkedgraph/ingest/utils.py
index 1692db43b..45b6e728f 100644
--- a/pychunkedgraph/ingest/utils.py
+++ b/pychunkedgraph/ingest/utils.py
@@ -2,6 +2,7 @@
 
 import logging
 import functools
+import math
 from os import environ
 from time import sleep
 from typing import Any, Generator, Tuple
@@ -163,7 +164,7 @@ def print_status(imanager: IngestionManager, redis, upgrade: bool = False):
     )
     print(header)
     for layer, done, count in zip(layers, completed, layer_counts):
-        print(f"{layer}\t| {done:9} / {count} \t| {done/count:6.1%}")
+        print(f"{layer}\t| {done:9} / {count} \t| {math.floor((done/count)*100):6}%")
 
     print("\n\nqueue status:")
     for layer, q, f, wb in zip(layers, queued, failed, worker_busy):

From b8bcc3c8f5b59364e1bf3f492ab9dc6f769a217a Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Thu, 21 Nov 2024 18:49:27 +0000
Subject: [PATCH 103/105] fix(upgrade): use timestamps of partners at layers >
 2

---
 pychunkedgraph/__init__.py                    | 2 +-
 pychunkedgraph/ingest/upgrade/parent_layer.py | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index c11769ec9..35c154a9d 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.7"
+__version__ = "3.0.8"
diff --git a/pychunkedgraph/ingest/upgrade/parent_layer.py b/pychunkedgraph/ingest/upgrade/parent_layer.py
index dace88b43..6f0b08711 100644
--- a/pychunkedgraph/ingest/upgrade/parent_layer.py
+++ b/pychunkedgraph/ingest/upgrade/parent_layer.py
@@ -66,7 +66,14 @@ def _populate_cx_edges_with_timestamps(
     for node, node_ts in zip(nodes, nodes_ts):
         CX_EDGES[node] = {}
         timestamps = timestamps_d[node]
-        timestamps.add(node_ts)
+        cx_edges_d_node_ts = _get_cx_edges_at_timestamp(node, response, node_ts)
+
+        edges = np.concatenate([empty_2d] + list(cx_edges_d_node_ts.values()))
+        partner_parent_ts_d = get_parent_timestamps(cg, edges[:, 1])
+        for v in partner_parent_ts_d.values():
+            timestamps.update(v)
+        CX_EDGES[node][node_ts] = cx_edges_d_node_ts
+
         for ts in sorted(timestamps):
             if ts < earliest_ts:
                 ts = earliest_ts

From 53b8e41a35d31d30876cdf76c968e2b9d9f56d70 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Thu, 5 Dec 2024 20:33:53 +0000
Subject: [PATCH 104/105] version 3.0.9

---
 .bumpversion.cfg           | 2 +-
 pychunkedgraph/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 6526fbc66..250e55eff 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.0.1
+current_version = 3.0.9
 commit = True
 tag = True
 
diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 35c154a9d..67ae584d7 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.8"
+__version__ = "3.0.9"

From 47f2d2f1d55556ef073210d659dff00c7a663d24 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Mon, 9 Dec 2024 22:33:47 +0000
Subject: [PATCH 105/105] feat: use mesh dir and dynamic dir from metadata

---
 .bumpversion.cfg           |  2 +-
 pychunkedgraph/__init__.py |  2 +-
 workers/mesh_worker.py     | 18 ++++++------------
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 250e55eff..2a9dad726 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.0.9
+current_version = 3.0.10
 commit = True
 tag = True
 
diff --git a/pychunkedgraph/__init__.py b/pychunkedgraph/__init__.py
index 67ae584d7..84994dc59 100644
--- a/pychunkedgraph/__init__.py
+++ b/pychunkedgraph/__init__.py
@@ -1 +1 @@
-__version__ = "3.0.9"
+__version__ = "3.0.10"
diff --git a/workers/mesh_worker.py b/workers/mesh_worker.py
index 2dd3dd34d..52864a89b 100644
--- a/workers/mesh_worker.py
+++ b/workers/mesh_worker.py
@@ -37,9 +37,12 @@ def callback(payload):
     )
 
     try:
-        mesh_dir = cg.meta.dataset_info["mesh"]
-        mesh_meta = cg.meta.dataset_info["mesh_metadata"]
-        cv_unsharded_mesh_dir = mesh_meta.get("unsharded_mesh_dir", f"dynamic_{cg.graph_id}")
+        mesh_meta = cg.meta.custom_data["mesh"]
+        mesh_dir = mesh_meta["dir"]
+        layer = mesh_meta["max_layer"]
+        mip = mesh_meta["mip"]
+        err = mesh_meta["max_error"]
+        cv_unsharded_mesh_dir = mesh_meta.get("dynamic_mesh_dir", "dynamic")
     except KeyError:
         logging.warning(f"No metadata found for {cg.graph_id}; ignoring...")
         return
@@ -48,15 +51,6 @@ def callback(payload):
         cg.meta.data_source.WATERSHED, mesh_dir, cv_unsharded_mesh_dir
     )
 
-    try:
-        mesh_data = cg.meta.custom_data["mesh"]
-        layer = mesh_data["max_layer"]
-        mip = mesh_data["mip"]
-        err = mesh_data["max_error"]
-    except KeyError:
-        return
-
-
     logging.log(INFO_HIGH, f"remeshing {l2ids}; graph {table_id} operation {op_id}.")
     meshgen.remeshing(
         cg,