diff --git a/build.sh b/build.sh index 79e84c2e..212eea9d 100755 --- a/build.sh +++ b/build.sh @@ -6,7 +6,7 @@ # set -e -RELEASE_VERSION="1.4.458" +RELEASE_VERSION="1.4.460" TOPDIR=/tmp/orchestrator-release export RELEASE_VERSION TOPDIR diff --git a/go/db/db.go b/go/db/db.go index 90326362..5b2b1ce9 100644 --- a/go/db/db.go +++ b/go/db/db.go @@ -571,6 +571,10 @@ var generateSQLPatches = []string{ ADD COLUMN lost_slaves text CHARACTER SET ascii NOT NULL after participating_instances, ADD COLUMN all_errors text CHARACTER SET ascii NOT NULL after lost_slaves `, + ` + ALTER TABLE audit + ADD COLUMN cluster_name varchar(128) CHARACTER SET ascii NOT NULL DEFAULT '' AFTER port + `, } // Track if a TLS has already been configured for topology diff --git a/go/inst/audit_dao.go b/go/inst/audit_dao.go index 03bc7f8d..2626b2b2 100644 --- a/go/inst/audit_dao.go +++ b/go/inst/audit_dao.go @@ -52,6 +52,10 @@ func AuditOperation(auditType string, instanceKey *InstanceKey, message string) if instanceKey == nil { instanceKey = &InstanceKey{} } + clusterName := "" + if instanceKey.Hostname != "" { + clusterName, _ = GetClusterName(instanceKey) + } if config.Config.AuditLogFile != "" { go func() error { @@ -61,37 +65,37 @@ func AuditOperation(auditType string, instanceKey *InstanceKey, message string) } defer f.Close() - text := fmt.Sprintf("%s\t%s\t%s\t%d\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, message) + text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message) if _, err = f.WriteString(text); err != nil { return log.Errore(err) } return nil }() } - - if syslogWriter != nil { - go func() { - syslogMessage := fmt.Sprintf("auditType:%s instance:%s message:%s", auditType, instanceKey.DisplayString(), message) - syslogWriter.Info(syslogMessage) - }() - } - _, err := db.ExecOrchestrator(` insert into audit ( - audit_timestamp, audit_type, hostname, port, message + audit_timestamp, audit_type, hostname, port, cluster_name, message ) VALUES ( - NOW(), ?, ?, ?, ? + NOW(), ?, ?, ?, ?, ? ) `, auditType, instanceKey.Hostname, instanceKey.Port, + clusterName, message, ) if err != nil { return log.Errore(err) } + logMessage := fmt.Sprintf("auditType:%s instance:%s cluster:%s message:%s", auditType, instanceKey.DisplayString(), clusterName, message) + if syslogWriter != nil { + go func() { + syslogWriter.Info(logMessage) + }() + } + log.Debugf(logMessage) auditOperationCounter.Inc(1) return err diff --git a/go/inst/instance_dao.go b/go/inst/instance_dao.go index 40c8f5ef..779c89d5 100644 --- a/go/inst/instance_dao.go +++ b/go/inst/instance_dao.go @@ -24,6 +24,7 @@ import ( "github.com/outbrain/golib/sqlutils" "github.com/outbrain/orchestrator/go/config" "github.com/outbrain/orchestrator/go/db" + "github.com/pmylund/go-cache" "github.com/rcrowley/go-metrics" "regexp" "sort" @@ -60,6 +61,9 @@ func (this InstancesByCountSlaveHosts) Less(i, j int) bool { return len(this[i].SlaveHosts) < len(this[j].SlaveHosts) } +// instanceKeyInformativeClusterName is a non-authoritative cache; used for auditing or general purpose. +var instanceKeyInformativeClusterName = cache.New(time.Duration(config.Config.DiscoveryPollSeconds/2)*time.Second, time.Second) + var readTopologyInstanceCounter = metrics.NewCounter() var readInstanceCounter = metrics.NewCounter() var writeInstanceCounter = metrics.NewCounter() @@ -1185,6 +1189,29 @@ func PopulateInstancesAgents(instances [](*Instance)) error { return nil } +func GetClusterName(instanceKey *InstanceKey) (clusterName string, err error) { + if clusterName, found := instanceKeyInformativeClusterName.Get(instanceKey.DisplayString()); found { + return clusterName.(string), nil + } + query := fmt.Sprintf(` + select + ifnull(max(cluster_name), '') as cluster_name + from + database_instance + where + hostname = '%s' + and port = %d + `, instanceKey.Hostname, instanceKey.Port) + + err = db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error { + clusterName = m.GetString("cluster_name") + instanceKeyInformativeClusterName.Set(instanceKey.DisplayString(), clusterName, cache.DefaultExpiration) + return nil + }) + + return clusterName, log.Errore(err) +} + // ReadClusters reads names of all known clusters func ReadClusters() ([]string, error) { clusterNames := []string{} diff --git a/go/inst/instance_topology.go b/go/inst/instance_topology.go index fe1ce6b1..e070bef9 100644 --- a/go/inst/instance_topology.go +++ b/go/inst/instance_topology.go @@ -909,7 +909,7 @@ Cleanup: } // and we're done (pending deferred functions) - AuditOperation("reset slave", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) + AuditOperation("reset-slave", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey)) return instance, err } @@ -950,7 +950,7 @@ Cleanup: } // and we're done (pending deferred functions) - AuditOperation("detach slave", instanceKey, fmt.Sprintf("%+v replication detached", *instanceKey)) + AuditOperation("detach-slave", instanceKey, fmt.Sprintf("%+v replication detached", *instanceKey)) return instance, err } @@ -991,7 +991,7 @@ Cleanup: } // and we're done (pending deferred functions) - AuditOperation("reattach slave", instanceKey, fmt.Sprintf("%+v replication reattached", *instanceKey)) + AuditOperation("reattach-slave", instanceKey, fmt.Sprintf("%+v replication reattached", *instanceKey)) return instance, err } @@ -1654,6 +1654,7 @@ func MultiMatchSlaves(masterKey *InstanceKey, belowKey *InstanceKey, pattern str if len(matchedSlaves) != len(slaves) { err = fmt.Errorf("MultiMatchSlaves: only matched %d out of %d slaves of %+v; error is: %+v", len(matchedSlaves), len(slaves), *masterKey, err) } + AuditOperation("multi-match-slaves", masterKey, fmt.Sprintf("matched %d slaves under %+v", len(matchedSlaves), *belowKey)) return matchedSlaves, belowInstance, err, errs } diff --git a/go/logic/topology_recovery.go b/go/logic/topology_recovery.go index 0398c2ba..36f6ebbe 100644 --- a/go/logic/topology_recovery.go +++ b/go/logic/topology_recovery.go @@ -326,11 +326,9 @@ func RecoverDeadMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) ( } if promotedSlave == nil { - log.Debugf("topology_recovery: - RecoverDeadMaster: Failure: no slave promoted.") inst.AuditOperation("recover-dead-master", failedInstanceKey, "Failure: no slave promoted.") } else { - log.Debugf("topology_recovery: - RecoverDeadMaster: promoted slave is %+v", promotedSlave.Key) - inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("master: %+v", promotedSlave.Key)) + inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("promoted slave: %+v", promotedSlave.Key)) } return promotedSlave, lostSlaves, err } @@ -578,7 +576,6 @@ func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProce recoveryResolved := false inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, "problem found; will recover") - log.Debugf("topology_recovery: RecoverDeadIntermediateMaster: will recover %+v", *failedInstanceKey) if !skipProcesses { if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil { return nil, topologyRecovery.AddError(err) @@ -613,8 +610,7 @@ func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProce recoveryResolved = true successorInstance = candidateSibling - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) went with %d errors", candidateSibling.Key, len(errs)) - inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs)) + inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs)) } } // Plan A: find a replacement intermediate master in same Data Center @@ -655,8 +651,7 @@ func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProce if len(relocatedSlaves) > 0 { recoveryResolved = true - log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: relocated up to %+v", successorInstance.Key) - inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs)) + inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs)) } else { err = log.Errorf("topology_recovery: RecoverDeadIntermediateMaster failed to match up any slave from %+v", *failedInstanceKey) topologyRecovery.AddError(err) @@ -793,11 +788,9 @@ func checkAndRecoverDeadCoMaster(analysisEntry inst.ReplicationAnalysis, candida coMaster, lostSlaves, err := RecoverDeadCoMaster(topologyRecovery, skipProcesses) ResolveRecovery(topologyRecovery, coMaster) if coMaster == nil { - log.Debugf("topology_recovery: - RecoverDeadCoMaster: Failure: no slave promoted.") inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "Failure: no slave promoted.") } else { - log.Debugf("topology_recovery: - RecoverDeadCoMaster: promoted co-master is %+v", coMaster.Key) - inst.AuditOperation("recover-dead-co-master", failedInstanceKey, fmt.Sprintf("master: %+v", coMaster.Key)) + inst.AuditOperation("recover-dead-co-master", failedInstanceKey, fmt.Sprintf("promoted co-master: %+v", coMaster.Key)) } topologyRecovery.LostSlaves.AddInstances(lostSlaves) if coMaster != nil { diff --git a/resources/public/css/orchestrator.css b/resources/public/css/orchestrator.css index 70452f5f..880552c7 100644 --- a/resources/public/css/orchestrator.css +++ b/resources/public/css/orchestrator.css @@ -302,8 +302,9 @@ body { padding-bottom: 0px; } -#node_modal .modal-title { - font: 12px sans-serif; +#node_modal .modal-title code { + background-color: inherit; + font-weight: bold; } #node_modal .modal-title .downtime-message { @@ -426,7 +427,7 @@ body { #cluster_sidebar { float: left; - position: absolute; + position: fixed; left: 15px; z-index:100; } diff --git a/resources/public/js/orchestrator.js b/resources/public/js/orchestrator.js index 05e4f147..b3b52bfb 100644 --- a/resources/public/js/orchestrator.js +++ b/resources/public/js/orchestrator.js @@ -226,7 +226,7 @@ function openNodeModal(node) { $('#node_modal #modalDataAttributesTable button[data-btn][data-grouped!=true]').appendTo("#node_modal .modal-footer"); $('#node_modal #modalDataAttributesTable [data-btn-group]').appendTo("#node_modal .modal-footer"); - $('#node_modal .modal-title').html(node.title); + $('#node_modal .modal-title').html(''+node.title+""); $('#modalDataAttributesTable').html("");