Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0 of find_minimal_coverage #510

Merged
merged 10 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.17.2
current_version = 2.18.2
commit = True
tag = True

Expand Down
11 changes: 1 addition & 10 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,8 @@ steps:
args:
- "-c"
- |
docker build -t gcr.io/$PROJECT_ID/pychunkedgraph:$TAG_NAME .
docker build -t $$USERNAME/pychunkedgraph:$TAG_NAME .
timeout: 600s

# Additional tag for Dockerhub
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args:
[
"-c",
"docker tag gcr.io/$PROJECT_ID/pychunkedgraph:$TAG_NAME $$USERNAME/pychunkedgraph:$TAG_NAME",
]
secretEnv: ["USERNAME"]

# Push the final image to Dockerhub
Expand Down
2 changes: 1 addition & 1 deletion pychunkedgraph/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.17.2"
__version__ = "2.18.2"
89 changes: 87 additions & 2 deletions pychunkedgraph/app/segmentation/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import time
from datetime import datetime
from functools import reduce
from collections import deque
from collections import deque, defaultdict

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -148,6 +148,91 @@ def handle_root(table_id, atomic_id):
return root_id


### GET MINIMAL COVERING NODES --------------------------------------------------


def handle_find_minimal_covering_nodes(table_id, is_binary=True):
if is_binary:
node_ids = np.frombuffer(request.data, np.uint64)
else:
node_ids = np.array(json.loads(request.data)["node_ids"], dtype=np.uint64)

# Input parameters
timestamp = _parse_timestamp("timestamp", time.time(), return_datetime=True)

# Initialize data structures
node_queue = defaultdict(set)
download_list = defaultdict(set)

# Get initial layers for the provided node_ids
cg = app_utils.get_cg(table_id)
initial_layers = np.array([cg.get_chunk_layer(node_id) for node_id in node_ids])

# Populate node_queue with nodes grouped by their layers
for node_id, layer in zip(node_ids, initial_layers):
node_queue[layer].add(node_id)

# find the minimum layer for the node_ids
min_layer = np.min(initial_layers)
min_children = cg.get_subgraph_nodes(
node_ids, return_layers=[min_layer], serializable=False, return_flattened=True
)
# concatenate all the min_children together to one list from the dictionary
min_children = np.concatenate(
[min_children[node_id] for node_id in min_children.keys()]
)

# Process nodes from their layers

for layer in range(
min_layer, cg.meta.layer_count
): # Process from higher layers to lower layers
if len(node_queue[layer]) == 0:
continue

current_nodes = list(node_queue[layer])

# Call handle_roots to find parents
parents = cg.get_roots(current_nodes, stop_layer=layer + 1, time_stamp=timestamp)
print(parents)
fcollman marked this conversation as resolved.
Show resolved Hide resolved
unique_parents = np.unique(parents)
parent_layers = np.array(
[cg.get_chunk_layer(parent) for parent in unique_parents]
)

# Call handle_leaves_many to get leaves
leaves = cg.get_subgraph_nodes(
unique_parents,
return_layers=[min_layer],
serializable=False,
return_flattened=True,
)

# Process parents
for parent, parent_layer in zip(unique_parents, parent_layers):
child_mask = np.isin(leaves[parent], min_children)
if not np.all(child_mask):
# Call handle_children to fetch children
children = cg.get_children(parent)

child_layers = np.array(
[cg.get_chunk_layer(child) for child in children]
)
for child, child_layer in zip(children, child_layers):
if child in node_queue[child_layer]:
download_list[child_layer].add(child)
else:
node_queue[parent_layer].add(parent)

# Clear the current layer's queue after processing
node_queue[layer].clear()

# Return the download list
download_list = np.concatenate([np.array(list(v)) for v in download_list.values()])

return download_list


### GET ROOTS -------------------------------------------------------------------


Expand Down Expand Up @@ -1081,7 +1166,7 @@ def _handle_latest(cg, node_ids, timestamp):
for n in node_ids:
try:
v = row_dict[n]
new_roots_ts.append(v[-1].timestamp.timestamp()) # sorted descending
new_roots_ts.append(v[-1].timestamp.timestamp()) # sorted descending
except KeyError:
new_roots_ts.append(0)
new_roots_ts = deque(new_roots_ts)
Expand Down
17 changes: 17 additions & 0 deletions pychunkedgraph/app/segmentation/v1/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,23 @@ def handle_leaves_many(table_id):
return jsonify_with_kwargs(root_to_leaf_dict, int64_as_str=int64_as_str)


### GET MINIMAL COVERING NODES


@bp.route("/table/<table_id>/minimal_covering_nodes", methods=["POST"])
@auth_requires_permission("view", public_table_key="table_id")
@remap_public(check_node_ids=False)
def handle_minimal_covering_nodes(table_id):
int64_as_str = request.args.get("int64_as_str", default=False, type=toboolean)
is_binary = request.args.get("is_binary", default=False, type=toboolean)

covering_nodes = common.handle_find_minimal_covering_nodes(table_id, is_binary=is_binary)
as_array = request.args.get("as_array", default=False, type=toboolean)
if as_array:
return tobinary(covering_nodes)
return jsonify_with_kwargs(covering_nodes, int64_as_str=int64_as_str)


### SUBGRAPH -------------------------------------------------------------------


Expand Down
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ flask
flask_cors
python-json-logger
redis
rq
rq<2
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this was a problem when trying to get a local version of PCG running and so i added it here pre-emptively to save us time in the future when we try to recompile reqs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine for now, version 2 was released only recently in Oct.

pyyaml
cachetools
werkzeug
Expand Down
Loading