Skip to content

Commit 07ebb8c

Browse files
committed
Fail fast when bundle and discovery plugins faced with unrecoverable errors.
Signed-off-by: Pushpalanka Jayawardhana <[email protected]>
1 parent 42517c6 commit 07ebb8c

File tree

1 file changed

+85
-44
lines changed

1 file changed

+85
-44
lines changed

filters/openpolicyagent/openpolicyagent.go

Lines changed: 85 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -481,61 +481,78 @@ func (registry *OpenPolicyAgentRegistry) new(store storage.Store, configBytes []
481481
// Start asynchronously starts the policy engine's plugins that download
482482
// policies, report status, etc.
483483
func (opa *OpenPolicyAgentInstance) Start(ctx context.Context, timeout time.Duration) error {
484+
484485
discoveryPlugin := discovery.Lookup(opa.manager)
485-
bundlePlugin := bundle.Lookup(opa.manager)
486486

487487
done := make(chan struct{})
488-
defer close(done)
489-
failed := make(chan error)
490-
defer close(failed)
488+
failed := make(chan error, 1)
491489

492-
discoveryPlugin.RegisterListener("startuplistener", func(status bundle.Status) {
493-
if len(status.Errors) > 0 {
494-
failed <- fmt.Errorf("discovery download failed: %w", errors.Join(status.Errors...))
495-
}
490+
var registerDiscoveryListenerOnce sync.Once
491+
registerDiscoveryListenerOnce.Do(func() {
492+
discoveryPlugin.RegisterListener("skipper-instance-startup-discovery", func(status bundle.Status) {
493+
handleStatusErrors(status, failed, "discovery plugin")
494+
})
496495
})
497-
498-
bundlePlugin.Register("startuplistener", func(status bundle.Status) {
499-
if len(status.Errors) > 0 {
500-
failed <- fmt.Errorf("bundle activation failed: %w", errors.Join(status.Errors...))
496+
defer discoveryPlugin.Unregister("skipper-instance-startup-discovery")
497+
498+
// Register listener for "bundle" status
499+
var registerBundleListenerOnce sync.Once
500+
opa.manager.RegisterPluginStatusListener("skipper-instance-startup-plugin", func(status map[string]*plugins.Status) {
501+
if _, exists := status["bundle"]; exists {
502+
bundlePlugin := bundle.Lookup(opa.manager)
503+
if bundlePlugin != nil {
504+
registerBundleListenerOnce.Do(func() {
505+
bundlePlugin.Register("skipper-instance-startup-bundle", func(status bundle.Status) {
506+
handleStatusErrors(status, failed, "bundle plugin")
507+
})
508+
})
509+
defer bundlePlugin.Unregister("skipper-instance-startup-bundle")
510+
}
501511
}
502512
})
503-
defer bundlePlugin.Unregister("startuplistener")
513+
defer opa.manager.UnregisterPluginStatusListener("skipper-instance-startup-plugin")
504514

505-
opa.manager.RegisterPluginStatusListener("startuplistener", func(status map[string]*plugins.Status) {
506-
for _, pluginstatus := range status {
507-
if pluginstatus != nil && pluginstatus.State != plugins.StateOK {
515+
// Register listener for general plugin status checks
516+
opa.manager.RegisterPluginStatusListener("generalStartUpListener", func(status map[string]*plugins.Status) {
517+
for _, pluginStatus := range status {
518+
if pluginStatus != nil && pluginStatus.State != plugins.StateOK {
508519
return
509520
}
510521
}
511522
close(done)
512523
})
513-
defer opa.manager.UnregisterPluginStatusListener("startuplistener")
524+
defer opa.manager.UnregisterPluginStatusListener("generalStartUpListener")
514525

515526
err := opa.manager.Start(ctx)
516527
if err != nil {
517528
return err
518529
}
519530

520531
select {
521-
case <-done:
522-
return nil
523-
case err := <-failed:
524-
opa.Close(ctx)
525-
526-
return err
527532
case <-ctx.Done():
533+
timeoutErr := ctx.Err()
534+
528535
for pluginName, status := range opa.manager.PluginStatus() {
529536
if status != nil && status.State != plugins.StateOK {
530537
opa.Logger().WithFields(map[string]interface{}{
531538
"plugin_name": pluginName,
532539
"plugin_state": status.State,
533540
"error_message": status.Message,
534-
}).Error("Open policy agent plugin did not start in time")
541+
}).Error("Open policy agent plugin: %v did not start in time", pluginName)
535542
}
536543
}
537544
opa.Close(ctx)
538-
return fmt.Errorf("one or more open policy agent plugins failed to start in %v with error: %w", timeout, err)
545+
if timeoutErr != nil {
546+
return fmt.Errorf("one or more open policy agent plugins failed to start in %v with error: %w", timeout, timeoutErr)
547+
}
548+
return fmt.Errorf("one or more open policy agent plugins failed to start in %v", timeout)
549+
550+
case <-done:
551+
return nil
552+
case err := <-failed:
553+
opa.Close(ctx)
554+
555+
return err
539556
}
540557
}
541558

@@ -546,25 +563,6 @@ func (opa *OpenPolicyAgentInstance) Close(ctx context.Context) {
546563
})
547564
}
548565

549-
func waitFunc(ctx context.Context, fun func() bool, interval time.Duration) error {
550-
if fun() {
551-
return nil
552-
}
553-
ticker := time.NewTicker(interval)
554-
defer ticker.Stop()
555-
556-
for {
557-
select {
558-
case <-ctx.Done():
559-
return fmt.Errorf("timed out while starting: %w", ctx.Err())
560-
case <-ticker.C:
561-
if fun() {
562-
return nil
563-
}
564-
}
565-
}
566-
}
567-
568566
func configLabelsInfo(opaConfig config.Config) func(*plugins.Manager) {
569567
info := ast.NewObject()
570568
labels := ast.NewObject()
@@ -821,3 +819,46 @@ func (l *QuietLogger) Error(fmt string, a ...interface{}) {
821819
func (l *QuietLogger) Warn(fmt string, a ...interface{}) {
822820
l.target.Warn(fmt, a)
823821
}
822+
823+
var temporaryClientErrorHTTPCodes = map[int64]struct{}{
824+
429: {}, // too many requests
825+
408: {}, // request timeout
826+
}
827+
828+
func isTemporaryError(code int64) bool {
829+
_, exists := temporaryClientErrorHTTPCodes[code]
830+
return exists
831+
}
832+
833+
func handleStatusErrors(
834+
status bundle.Status,
835+
failed chan error,
836+
prefix string,
837+
) {
838+
if status.Code == "bundle_error" {
839+
if status.HTTPCode == "" {
840+
failed <- formatStatusError(prefix, status)
841+
return
842+
}
843+
code, err := status.HTTPCode.Int64()
844+
if err == nil {
845+
if code >= 400 && code < 500 && !isTemporaryError(code) {
846+
// Fail for error codes in the range 400-500 excluding temporary errors
847+
failed <- formatStatusError(prefix, status)
848+
return
849+
} else if code >= 500 {
850+
// Do not fail for 5xx errors and keep retrying
851+
return
852+
}
853+
}
854+
if err != nil {
855+
failed <- formatStatusError(prefix, status)
856+
return
857+
}
858+
}
859+
}
860+
861+
func formatStatusError(prefix string, status bundle.Status) error {
862+
return fmt.Errorf("%s failed: Name: %s, Code: %s, Message: %s, HTTPCode: %s, Errors: %v",
863+
prefix, status.Name, status.Code, status.Message, status.HTTPCode, status.Errors)
864+
}

0 commit comments

Comments
 (0)