Skip to content

Commit bdfb0e7

Browse files
committed
add python method to extract the vertex list and update API
1 parent 2fa5896 commit bdfb0e7

File tree

1 file changed

+65
-26
lines changed

1 file changed

+65
-26
lines changed

python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py

Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from cugraph.structure.replicate_edgelist import replicate_cudf_dataframe
1616
from cugraph.structure.symmetrize import symmetrize as symmetrize_df
1717
from pylibcugraph import decompress_to_edgelist as pylibcugraph_decompress_to_edgelist
18+
from pylibcugraph import extract_vertex_list as pylibcugraph_extract_vertex_list
1819
from cugraph.structure.number_map import NumberMap
1920
import cugraph.dask.common.mg_utils as mg_utils
2021
import cudf
@@ -299,16 +300,6 @@ def __from_edgelist(
299300

300301
else:
301302
value_col = None
302-
303-
if self.properties.renumbered is True:
304-
if isinstance(vertices, cudf.Series):
305-
vertices = G.lookup_internal_vertex_id(vertices, vertices.columns)
306-
else:
307-
start = G.lookup_internal_vertex_id(cudf.Series(start))
308-
309-
if not isinstance(vertices, cudf.Series):
310-
vertex_dtype = self.nodes().dtype
311-
vertices = cudf.Series(start, dtype=vertex_dtype)
312303

313304
# FIXME: if the user calls self.edgelist.edgelist_df after creating a
314305
# symmetric graph, return the symmetric edgelist?
@@ -320,13 +311,24 @@ def __from_edgelist(
320311

321312
if self.batch_enabled:
322313
self._replicate_edgelist()
314+
315+
if self.properties.renumbered is True:
316+
if isinstance(vertices, cudf.Series):
317+
vertices = self.lookup_internal_vertex_id(vertices, vertices.columns)
318+
else:
319+
vertices = self.lookup_internal_vertex_id(cudf.Series(vertices))
320+
321+
if not isinstance(vertices, cudf.Series):
322+
vertex_dtype = self.edgelist.edgelist_df[simpleGraphImpl.srcCol].dtype
323+
vertices = cudf.Series(vertices, dtype=vertex_dtype)
323324

324325
self._make_plc_graph(
325326
value_col=value_col,
326327
store_transposed=store_transposed,
327328
renumber=renumber,
328329
drop_multi_edges=not self.properties.multi_edge,
329330
symmetrize=symmetrize,
331+
vertices=vertices
330332
)
331333

332334
def to_pandas_edgelist(
@@ -889,6 +891,56 @@ def decompress_to_edgelist(
889891
df, _ = self.renumber_map.unrenumber(df, "dst", get_column_names=True)
890892

891893
return df
894+
895+
def extract_vertex_list(
896+
self, return_unrenumbered_vertices: bool = True
897+
) -> cudf.DataFrame:
898+
"""
899+
Extract the vertices from a graph.
900+
901+
Parameters
902+
----------
903+
return_unrenumbered_vertices : bool (default=True)
904+
Flag determining whether to return the original input input vertices
905+
if 'True' or the renumbered one if 'False' and the edgelist was
906+
renumbered.
907+
908+
Returns
909+
-------
910+
911+
series : cudf.Series
912+
GPU Series containing all the vertices in the graph including
913+
isolated vertices.
914+
915+
Examples
916+
--------
917+
>>> from cugraph.datasets import karate
918+
>>> G = karate.get_graph(download=True)
919+
>>> vertices = G.extract_vertex_list()
920+
921+
"""
922+
923+
do_expensive_check = False
924+
vertices = pylibcugraph_extract_vertex_list(
925+
resource_handle=ResourceHandle(),
926+
graph=self._plc_graph,
927+
do_expensive_check=do_expensive_check)
928+
929+
vertices = cudf.Series(
930+
vertices,
931+
dtype=self.edgelist.edgelist_df[simpleGraphImpl.srcCol].dtype)
932+
933+
934+
if self.properties.renumbered and return_unrenumbered_vertices:
935+
df_ = cudf.DataFrame()
936+
df_["vertex"] = vertices
937+
df_ = self.renumber_map.unrenumber(df_, "vertex")
938+
if len(df_.columns) > 1:
939+
vertices = df_
940+
else:
941+
vertices = df_["vertex"]
942+
943+
return vertices
892944

893945
def select_random_vertices(
894946
self,
@@ -1491,22 +1543,9 @@ def nodes(self):
14911543
If multi columns vertices, return a cudf.DataFrame.
14921544
"""
14931545
if self.edgelist is not None:
1494-
df = self.edgelist.edgelist_df
1495-
if self.properties.renumbered:
1496-
df = self.renumber_map.df_internal_to_external.drop(columns="id")
1497-
1498-
if len(df.columns) > 1:
1499-
return df
1500-
else:
1501-
return df[df.columns[0]]
1502-
else:
1503-
return (
1504-
cudf.concat(
1505-
[df[simpleGraphImpl.srcCol], df[simpleGraphImpl.dstCol]]
1506-
)
1507-
.drop_duplicates()
1508-
.reset_index(drop=True)
1509-
)
1546+
# Retrieve the vertex list
1547+
return self.extract_vertex_list(return_unrenumbered_vertices=False)
1548+
15101549
if self.adjlist is not None:
15111550
return cudf.Series(np.arange(0, self.number_of_nodes()))
15121551

0 commit comments

Comments
 (0)