Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide a subcommand to commit subdags. #433

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 4 additions & 46 deletions bioconda_utils/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,54 +319,12 @@ def build_recipes(
failed = []
skip_dependent = defaultdict(list)

# Get connected subdags and sort by nodes
if testonly:
# use each node as a subdag (they are grouped into equal sizes below)
subdags = sorted([[n] for n in nx.nodes(dag)])
else:
# take connected components as subdags, remove cycles
subdags = []
for cc_nodes in nx.connected_components(dag.to_undirected()):
cc = dag.subgraph(sorted(cc_nodes))
nodes_in_cycles = set()
for cycle in list(nx.simple_cycles(cc)):
logger.error(
'BUILD ERROR: '
'dependency cycle found: %s',
cycle,
)
nodes_in_cycles.update(cycle)
for name in sorted(nodes_in_cycles):
cycle_fail_recipes = sorted(name2recipes[name])
logger.error(
'BUILD ERROR: '
'cannot build recipes for %s since it cyclically depends '
'on other packages in the current build job. Failed '
'recipes: %s',
name, cycle_fail_recipes,
)
failed.extend(cycle_fail_recipes)
for n in nx.algorithms.descendants(cc, name):
if n in nodes_in_cycles:
continue # don't count packages twice (failed/skipped)
skip_dependent[n].extend(cycle_fail_recipes)
cc_without_cycles = dag.subgraph(
name for name in cc if name not in nodes_in_cycles
)
# ensure that packages which need a build are built in the right order
subdags.append(nx.topological_sort(cc_without_cycles))
# chunk subdags such that we have at most subdags_n many
if subdags_n < len(subdags):
chunks = [[n for subdag in subdags[i::subdags_n] for n in subdag]
for i in range(subdags_n)]
else:
chunks = subdags
if subdag_i >= len(chunks):
ret = graph.get_subdag(dag, name2recipes, subdags_n, subdag_i,
ignore_dependencies=testonly)
if ret is None:
logger.info("Nothing to be done.")
return True
# merge subdags of the selected chunk
subdag = dag.subgraph(chunks[subdag_i])

subdag, subdags_n = ret

recipes = [recipe
for package in subdag
Expand Down
16 changes: 14 additions & 2 deletions bioconda_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def select_recipes(packages, git_range, recipe_folder, config_filename, config,
# Recipes with changed `meta.yaml` or `build.sh` files
changed_recipes = [
os.path.dirname(f) for f in modified
if os.path.basename(f) in ['meta.yaml', 'build.sh'] and
if os.path.basename(f) in ['meta.yaml', 'build.sh', 'post-link.sh', 'pre-unlink.sh'] and
os.path.exists(f)
]
logger.info(
Expand Down Expand Up @@ -839,6 +839,7 @@ def autobump(recipe_folder, config, packages='*', cache=None,
if git_handler:
git_handler.close()


@arg('--loglevel', default='info', help='Log level')
def bot(loglevel='info'):
"""Locally accedd bioconda-bot command API
Expand All @@ -854,8 +855,19 @@ def bot(loglevel='info'):

logger.error("Nothing here yet")


@arg('recipe_folder', help='Path to recipes directory')
@arg('config', help='Path to yaml file specifying the configuration')
@arg('--subdag', '-k', required=True, metavar='K', type=int, help='Commit the K-th of N subdags.')
@arg('--subdags', '-n', required=True, metavar='N', type=int, help='Number of subdags to consider.')
@arg('--message', '--msg', required=True, metavar='MSG', help='Commit message, will be rendered as "MSG: subdag K of N".')
@enable_logging()
def commit_subdags(recipe_folder, config, subdag=None, subdags=None, message=None):
utils.commit_subdags(recipe_folder, config, subdags, subdag, message)


def main():
argh.dispatch_commands([
build, dag, dependent, lint, duplicates, update_pinning,
bioconductor_skeleton, clean_cran_skeleton, autobump, bot
bioconductor_skeleton, clean_cran_skeleton, autobump, bot, commit_subdags
])
57 changes: 57 additions & 0 deletions bioconda_utils/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,60 @@ def filter(dag, packages):
raise

return nx.subgraph(dag, nodes)


def get_subdag(dag, name2recipes, n, i, ignore_dependencies=False):
"""Return ith of n chunks over dependency graph, such that there is no
dependency pointing outside of the chunk, unless ignore_dependencies is
set to True.

Returns None if there are no nodes left to put into chunk i.
"""
# Get connected subdags and sort by nodes
if ignore_dependencies:
# use each node as a subdag (they are grouped into equal sizes below)
subdags = sorted([[n] for n in nx.nodes(dag)])
else:
# take connected components as subdags, remove cycles
subdags = []
for cc_nodes in nx.connected_components(dag.to_undirected()):
cc = dag.subgraph(sorted(cc_nodes))
nodes_in_cycles = set()
for cycle in list(nx.simple_cycles(cc)):
logger.error(
'BUILD ERROR: '
'dependency cycle found: %s',
cycle,
)
nodes_in_cycles.update(cycle)
for name in sorted(nodes_in_cycles):
cycle_fail_recipes = sorted(name2recipes[name])
logger.error(
'BUILD ERROR: '
'cannot build recipes for %s since it cyclically depends '
'on other packages in the current build job. Failed '
'recipes: %s',
name, cycle_fail_recipes,
)
failed.extend(cycle_fail_recipes)
for n in nx.algorithms.descendants(cc, name):
if n in nodes_in_cycles:
continue # don't count packages twice (failed/skipped)
skip_dependent[n].extend(cycle_fail_recipes)
cc_without_cycles = dag.subgraph(
name for name in cc if name not in nodes_in_cycles
)
# ensure that packages which need a build are built in the right order
subdags.append(nx.topological_sort(cc_without_cycles))
# chunk subdags such that we have at most subdags_n many
if n < len(subdags):
chunks = [[node for subdag in subdags[i::n] for node in subdag]
for i in range(n)]
else:
chunks = subdags
if i >= len(chunks):
return None
# merge subdags of the selected chunk
subdag = dag.subgraph(chunks[i])

return subdag, len(chunks)
34 changes: 34 additions & 0 deletions bioconda_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,3 +1386,37 @@ def get_package_data(self, key=None, channels=None, name=None, version=None,
if isinstance(key, str):
return list(df[key])
return df[key].itertuples(index=False)


def commit_subdags(recipe_folder, config_file, n, i, msg):
"""Creates a git commit for the k-th of n subdags of recipes, ensuring that
dependent recipes are committed together.
"""
from . import graph
import git

repo = git.Repo(recipe_folder, search_parent_directories=True)

recipes = sorted(set(map(
os.path.dirname, modified_recipes(["HEAD"], recipe_folder, config_file)
)))
logger.info("%s recipes changed in total", len(recipes))

# build dag
dag, name2recipes = graph.build(recipes, config=load_config(config_file))

# obtain subdag
subdag, n = graph.get_subdag(dag, name2recipes, n, i)

if not subdag:
logger.info("Nothing to be done.")
return

for pkg in subdag:
pkg_recipes = name2recipes[pkg]
for recipe in pkg_recipes:
logger.info(f"Adding recipe {recipe}")
repo.index.add([recipe])

logger.info(f"Committing subdag {i} of {n}.")
repo.index.commit(f"{msg}: subdag {i} of {n}")