Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 24 additions & 12 deletions pkg/controller/common/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -934,12 +934,25 @@ func CalculateConfigFileDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st
return diffFileSet
}

// CalculateConfigUnitDiffs compares the units present in two ignition configurations and returns the list of units
// that are different between them
//
//nolint:dupl
func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []string {
// Go through the units and see what is new or different
type UnitDiff struct {
Added []ign3types.Unit
Removed []ign3types.Unit
Updated []ign3types.Unit
}

// GetChangedConfigUnitsByType compares the units present in two ignition configurations, one
// old config and the other new, a struct with units mapped to the type of change:
// - New unit added: "Added"
// - Unit previously existing removed: "Removed"
// - Existing unit changed in some way: "Updated"
func GetChangedConfigUnitsByType(oldIgnConfig, newIgnConfig *ign3types.Config) (unitDiffs UnitDiff) {
diffUnit := UnitDiff{
Added: []ign3types.Unit{},
Removed: []ign3types.Unit{},
Updated: []ign3types.Unit{},
}

// Get the sets of the old and new units from the ignition configurations
oldUnitSet := make(map[string]ign3types.Unit)
for _, u := range oldIgnConfig.Systemd.Units {
oldUnitSet[u.Name] = u
Expand All @@ -948,26 +961,25 @@ func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st
for _, u := range newIgnConfig.Systemd.Units {
newUnitSet[u.Name] = u
}
diffUnitSet := []string{}

// First check if any units were removed
for unit := range oldUnitSet {
_, ok := newUnitSet[unit]
if !ok {
diffUnitSet = append(diffUnitSet, unit)
diffUnit.Removed = append(diffUnit.Removed, oldUnitSet[unit])
}
}

// Now check if any units were added/changed
// Now check if any units were added or updated
for name, newUnit := range newUnitSet {
oldUnit, ok := oldUnitSet[name]
if !ok {
diffUnitSet = append(diffUnitSet, name)
diffUnit.Added = append(diffUnit.Added, newUnitSet[name])
} else if !reflect.DeepEqual(oldUnit, newUnit) {
diffUnitSet = append(diffUnitSet, name)
diffUnit.Updated = append(diffUnit.Updated, newUnitSet[name])
}
}
return diffUnitSet
return diffUnit
}

// NewIgnFile returns a simple ignition3 file from just path and file contents.
Expand Down
31 changes: 30 additions & 1 deletion pkg/daemon/on_disk_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

ign2types "github.com/coreos/ignition/config/v2_2/types"
ign3types "github.com/coreos/ignition/v2/config/v3_5/types"
Expand Down Expand Up @@ -100,7 +101,12 @@ func checkV3Unit(unit ign3types.Unit, systemdPath string) error {
return nil
}

return checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions)
err := checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions)
if err != nil {
return err
}

return checkUnitEnabled(unit.Name, unit.Enabled)
}

// checkV3Units validates the contents of all the units in the
Expand Down Expand Up @@ -230,6 +236,29 @@ func checkV2Files(files []ign2types.File) error {
return nil
}

// checkUnitEnabled checks whether a systemd unit is enabled as expected.
func checkUnitEnabled(name string, expectedEnabled *bool) error {
if expectedEnabled == nil {
return nil
}
outBytes, _ := runGetOut("systemctl", "is-enabled", name)
out := strings.TrimSpace(string(outBytes))

switch {
case *expectedEnabled:
// If expected to be enabled, reject known "not enabled" states
if out == "disabled" || out == "masked" || out == "masked-runtime" || out == "not-found" {
return fmt.Errorf("unit %q expected enabled=true, but systemd reports %q", name, out)
}
case !*expectedEnabled:
// If expected to be disabled, reject any of the enabled-like states
if out == "enabled" || out == "enabled-runtime" {
return fmt.Errorf("unit %q expected enabled=false, but systemd reports %q", name, out)
}
}
return nil
}

// checkFileContentsAndMode reads the file from the filepath and compares its
// contents and mode with the expectedContent and mode parameters. It logs an
// error in case of an error or mismatch and returns the status of the
Expand Down
47 changes: 39 additions & 8 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"path/filepath"
"reflect"
goruntime "runtime"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -1013,13 +1014,23 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
logSystem("Starting update from %s to %s: %+v", oldConfigName, newConfigName, diff)

diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig)
diffUnitSet := ctrlcommon.CalculateConfigUnitDiffs(&oldIgnConfig, &newIgnConfig)
// Get the added and updated units
unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig)
addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated)
// Get the names of all units changed in some way (added, removed, or updated)
var allChangedUnitNames []string
for _, unit := range append(addedOrChangedUnits, unitDiff.Removed...) {
allChangedUnitNames = append(allChangedUnitNames, unit.Name)
}

var nodeDisruptionActions []opv1.NodeDisruptionPolicyStatusAction
var actions []string
// Check for forcefile before calculatePostConfigChange* functions delete it.
// This is needed for updateFiles to know whether to write all units (OCPBUGS-74692).
forceFilePresent := forceFileExists()
// Node Disruption Policies cannot be used during firstboot as API is not accessible.
if !firstBoot {
nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, diffUnitSet)
nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, allChangedUnitNames)
} else {
actions, err = calculatePostConfigChangeAction(diff, diffFileSet)
klog.Infof("Skipping node disruption polciies as node is executing first boot.")
Expand Down Expand Up @@ -1125,13 +1136,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
}

// update files on disk that need updating
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil {
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil {
return err
}

defer func() {
if retErr != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, skipCertificateWrite); err != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite, false); err != nil {
errs := kubeErrs.NewAggregate([]error{err, retErr})
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
return
Expand Down Expand Up @@ -1266,15 +1277,21 @@ func (dn *Daemon) updateHypershift(oldConfig, newConfig *mcfgv1.MachineConfig, d
return fmt.Errorf("parsing new Ignition config failed: %w", err)
}

unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig)
addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated)

// Check for forcefile to support config drift recovery (OCPBUGS-74692)
forceFilePresent := forceFileExists()

// update files on disk that need updating
// We should't skip the certificate write in HyperShift since it does not run the extra daemon process
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, false); err != nil {
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false, forceFilePresent); err != nil {
return err
}

defer func() {
if retErr != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, false); err != nil {
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false, false); err != nil {
errs := kubeErrs.NewAggregate([]error{err, retErr})
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
return
Expand Down Expand Up @@ -1783,12 +1800,26 @@ func (dn *CoreOSDaemon) getKernelPackagesForRelease() releaseKernelPackages {
// whatever has been written is picked up by the appropriate daemons, if
// required. in particular, a daemon-reload and restart for any unit files
// touched.
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, skipCertificateWrite bool) error {
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite, forceFilePresent bool) error {
klog.Info("Updating files")
if err := dn.writeFiles(newIgnConfig.Storage.Files, skipCertificateWrite); err != nil {
return err
}
if err := dn.writeUnits(newIgnConfig.Systemd.Units); err != nil {

// With OCPBUGS-58023, we updated this flow to only write units that were either added or
// updated. As can be seen in OCPBUGS-74692, this impacted the traditional method to recover
// from config drifts with systemd units. It makes the `touch /run/machine-config-daemon-force`
// command useless since the new flow does not rewrite all files, only the ones that have been
// added or changed with the latest MC. To keep the fix for OCPBUGS-58023 and allow continue
// supporting the traditional config drift recovery for systemd units, all units should be
// written when a forcefile exists.
unitsToWrite := addedOrChangedUnits
if forceFilePresent {
klog.Info("Forcefile exists, writing all units")
unitsToWrite = newIgnConfig.Systemd.Units
}

if err := dn.writeUnits(unitsToWrite); err != nil {
return err
}
return dn.deleteStaleData(oldIgnConfig, newIgnConfig)
Expand Down