Skip to content

RSDK-10914, RSDK-11161, APP-8710 - Re-initialize resources on module reconfigure/crash/reload #5113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 0 additions & 70 deletions module/modmanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"path/filepath"
"regexp"
"runtime"
"slices"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -690,75 +689,6 @@ func (mgr *Manager) ValidateConfig(ctx context.Context, conf resource.Config) ([
// and modified resources. It also puts modular resources whose module has been modified or added in conf.Added if
// they are not already there since the resources themselves are not necessarily new.
func (mgr *Manager) ResolveImplicitDependenciesInConfig(ctx context.Context, conf *config.Diff) error {
// NOTE(benji): We could simplify some of the following `continue`
// conditional clauses to a single clause, but we split them for readability.
for _, c := range conf.Right.Components {
mod, ok := mgr.getModule(c)
if !ok {
// continue if this component is not being provided by a module.
continue
}

lenModified, lenAdded := len(conf.Modified.Modules), len(conf.Added.Modules)
deltaModules := make([]config.Module, lenModified, lenModified+lenAdded)
copy(deltaModules, conf.Modified.Modules)
deltaModules = append(deltaModules, conf.Added.Modules...)

if !slices.ContainsFunc(deltaModules, func(elem config.Module) bool {
return elem.Name == mod.cfg.Name
}) {
// continue if this modular component is not being handled by a modified
// or added module.
continue
}
if slices.ContainsFunc(conf.Added.Components, func(elem resource.Config) bool {
return elem.Name == c.Name
}) {
// continue if this modular component handled by a modified module is
// already in conf.Added.Components.
continue
}

// Add modular component to conf.Added.Components.
conf.Added.Components = append(conf.Added.Components, c)
// If component is in conf.Modified, the user modified a module and its
// component at the same time. Remove that resource from conf.Modified so
// the restarted module receives an AddResourceRequest and not a
// ReconfigureResourceRequest.
conf.Modified.Components = slices.DeleteFunc(
conf.Modified.Components, func(elem resource.Config) bool { return elem.Name == c.Name })
}
for _, s := range conf.Right.Services {
mod, ok := mgr.getModule(s)
if !ok {
// continue if this service is not being provided by a module.
continue
}
if !slices.ContainsFunc(conf.Modified.Modules, func(elem config.Module) bool {
return elem.Name == mod.cfg.Name
}) {
// continue if this modular service is not being handled by a modified
// module.
continue
}
if slices.ContainsFunc(conf.Added.Services, func(elem resource.Config) bool {
return elem.Name == s.Name
}) {
// continue if this modular service handled by a modified module is
// already in conf.Added.Services.
continue
}

// Add modular service to conf.Added.Services.
conf.Added.Services = append(conf.Added.Services, s)
// If service is in conf.Modified, the user modified a module and its
// service at the same time. Remove that resource from conf.Modified so
// the restarted module receives an AddResourceRequest and not a
// ReconfigureResourceRequest.
conf.Modified.Services = slices.DeleteFunc(
conf.Modified.Services, func(elem resource.Config) bool { return elem.Name == s.Name })
}

// If something was added or modified, go through components and services in
// diff.Added and diff.Modified, call Validate on all those that are modularized,
// and store implicit dependencies.
Expand Down
10 changes: 9 additions & 1 deletion resource/graph_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,10 +383,18 @@ func (w *GraphNode) SetNewConfig(newConfig Config, dependencies []string) {
// dependency updates. If the node was previously marked for removal,
// this makes no changes.
func (w *GraphNode) SetNeedsUpdate() {
// doing two mutex ops here but we assume there's only one caller.
w.setNeedsReconfigure(w.Config(), false, w.UnresolvedDependencies())
}

// SetNeedsRebuild is used to inform the node that it should
// rebuild itself with the same config. The caller is expected to
// handle closing of the resource on the node if necessary.
func (w *GraphNode) SetNeedsRebuild() {
// doing two mutex ops here but we assume there's only one caller.
w.UnsetResource()
w.setNeedsReconfigure(w.Config(), true, w.UnresolvedDependencies())
}

// setUnresolvedDependencies sets names that are yet to be resolved as
// dependencies for the node. Note that even an empty list will still
// set needsDependencyResolution to true. If no resolution is needed,
Expand Down
11 changes: 5 additions & 6 deletions robot/impl/local_robot.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,17 +552,16 @@ func New(
}

// removeOrphanedResources is called by the module manager to remove resources
// orphaned due to module crashes.
// orphaned due to module crashes. Resources passed into this function will be
// marked for rebuilding and handled by the completeConfig worker.
func (r *localRobot) removeOrphanedResources(ctx context.Context,
rNames []resource.Name,
) {
r.reconfigurationLock.Lock()
defer r.reconfigurationLock.Unlock()
r.manager.markResourcesRemoved(rNames, nil)
if err := r.manager.removeMarkedAndClose(ctx, nil); err != nil {
r.logger.CErrorw(ctx, "error removing and closing marked resources",
"error", err)
}
// resource names passed into markRebuildResources are already closed as the module
// crashed and thus do not need to be closed.
r.manager.markRebuildResources(rNames)
r.updateWeakAndOptionalDependents(ctx)
}

Expand Down
30 changes: 23 additions & 7 deletions robot/impl/resource_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -1158,16 +1158,13 @@ func (manager *resourceManager) updateResources(
manager.logger.CErrorw(ctx, "module config validation error; skipping", "module", mod.Name, "error", err)
continue
}
orphanedResourceNames, err := manager.moduleManager.Reconfigure(ctx, mod)
affectedResourceNames, err := manager.moduleManager.Reconfigure(ctx, mod)
if err != nil {
manager.logger.CErrorw(ctx, "error reconfiguring module", "module", mod.Name, "error", err)
}
for _, resToClose := range manager.markResourcesRemoved(orphanedResourceNames, nil) {
if err := resToClose.Close(ctx); err != nil {
manager.logger.CErrorw(ctx, "error closing now orphaned resource", "resource",
resToClose.Name().String(), "module", mod.Name, "error", err)
}
}
// resources passed into markRebuildResources have already been closed during module reconfiguration, so
// not necessary to Close again.
manager.markRebuildResources(affectedResourceNames)
}

if manager.moduleManager != nil {
Expand Down Expand Up @@ -1333,6 +1330,25 @@ func (manager *resourceManager) markResourcesRemoved(
return resourcesToCloseBeforeComplete
}

// markRebuildResources reinitializes resources passed in, forcing a rebuild of the resource during
// reconfiguration and/or completeConfig loop. This function expects the caller to clean up any resources
// if necessary.
func (manager *resourceManager) markRebuildResources(rNames []resource.Name) {
for _, rName := range rNames {
// Disable changes to shell in untrusted
if manager.opts.untrustedEnv && rName.API == shell.API {
continue
}

resNode, ok := manager.resources.Node(rName)
if !ok {
continue
}
resNode.SetNeedsRebuild()
manager.markChildrenForUpdate(rName)
}
}

// createConfig will create a config.Config based on the current state of the
// resource graph, processManager and moduleManager. The created config will
// possibly contain default services registered by the RDK and not specified by
Expand Down
Loading