Skip to content

Commit

Permalink
audit:
Browse files Browse the repository at this point in the history
- auditing cluster name where available
- db: added cluster_name column in audit table
- implicitly issues a log.Debug(). This cleans up some code
- fixed/added auditing messages around the code

instance_dao:
- added GetClusterName(), using cache

web:
- minor visualization changes (modal title, fixed sidebar)
  • Loading branch information
shlomi-noach committed Oct 22, 2015
1 parent 3ead9a2 commit e8083e4
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 30 deletions.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#
set -e

RELEASE_VERSION="1.4.458"
RELEASE_VERSION="1.4.460"
TOPDIR=/tmp/orchestrator-release
export RELEASE_VERSION TOPDIR

Expand Down
4 changes: 4 additions & 0 deletions go/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,10 @@ var generateSQLPatches = []string{
ADD COLUMN lost_slaves text CHARACTER SET ascii NOT NULL after participating_instances,
ADD COLUMN all_errors text CHARACTER SET ascii NOT NULL after lost_slaves
`,
`
ALTER TABLE audit
ADD COLUMN cluster_name varchar(128) CHARACTER SET ascii NOT NULL DEFAULT '' AFTER port
`,
}

// Track if a TLS has already been configured for topology
Expand Down
26 changes: 15 additions & 11 deletions go/inst/audit_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ func AuditOperation(auditType string, instanceKey *InstanceKey, message string)
if instanceKey == nil {
instanceKey = &InstanceKey{}
}
clusterName := ""
if instanceKey.Hostname != "" {
clusterName, _ = GetClusterName(instanceKey)
}

if config.Config.AuditLogFile != "" {
go func() error {
Expand All @@ -61,37 +65,37 @@ func AuditOperation(auditType string, instanceKey *InstanceKey, message string)
}

defer f.Close()
text := fmt.Sprintf("%s\t%s\t%s\t%d\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, message)
text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message)
if _, err = f.WriteString(text); err != nil {
return log.Errore(err)
}
return nil
}()
}

if syslogWriter != nil {
go func() {
syslogMessage := fmt.Sprintf("auditType:%s instance:%s message:%s", auditType, instanceKey.DisplayString(), message)
syslogWriter.Info(syslogMessage)
}()
}

_, err := db.ExecOrchestrator(`
insert
into audit (
audit_timestamp, audit_type, hostname, port, message
audit_timestamp, audit_type, hostname, port, cluster_name, message
) VALUES (
NOW(), ?, ?, ?, ?
NOW(), ?, ?, ?, ?, ?
)
`,
auditType,
instanceKey.Hostname,
instanceKey.Port,
clusterName,
message,
)
if err != nil {
return log.Errore(err)
}
logMessage := fmt.Sprintf("auditType:%s instance:%s cluster:%s message:%s", auditType, instanceKey.DisplayString(), clusterName, message)
if syslogWriter != nil {
go func() {
syslogWriter.Info(logMessage)
}()
}
log.Debugf(logMessage)
auditOperationCounter.Inc(1)

return err
Expand Down
27 changes: 27 additions & 0 deletions go/inst/instance_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/outbrain/golib/sqlutils"
"github.com/outbrain/orchestrator/go/config"
"github.com/outbrain/orchestrator/go/db"
"github.com/pmylund/go-cache"
"github.com/rcrowley/go-metrics"
"regexp"
"sort"
Expand Down Expand Up @@ -60,6 +61,9 @@ func (this InstancesByCountSlaveHosts) Less(i, j int) bool {
return len(this[i].SlaveHosts) < len(this[j].SlaveHosts)
}

// instanceKeyInformativeClusterName is a non-authoritative cache; used for auditing or general purpose.
var instanceKeyInformativeClusterName = cache.New(time.Duration(config.Config.DiscoveryPollSeconds/2)*time.Second, time.Second)

var readTopologyInstanceCounter = metrics.NewCounter()
var readInstanceCounter = metrics.NewCounter()
var writeInstanceCounter = metrics.NewCounter()
Expand Down Expand Up @@ -1185,6 +1189,29 @@ func PopulateInstancesAgents(instances [](*Instance)) error {
return nil
}

func GetClusterName(instanceKey *InstanceKey) (clusterName string, err error) {
if clusterName, found := instanceKeyInformativeClusterName.Get(instanceKey.DisplayString()); found {
return clusterName.(string), nil
}
query := fmt.Sprintf(`
select
ifnull(max(cluster_name), '') as cluster_name
from
database_instance
where
hostname = '%s'
and port = %d
`, instanceKey.Hostname, instanceKey.Port)

err = db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error {
clusterName = m.GetString("cluster_name")
instanceKeyInformativeClusterName.Set(instanceKey.DisplayString(), clusterName, cache.DefaultExpiration)
return nil
})

return clusterName, log.Errore(err)
}

// ReadClusters reads names of all known clusters
func ReadClusters() ([]string, error) {
clusterNames := []string{}
Expand Down
7 changes: 4 additions & 3 deletions go/inst/instance_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ Cleanup:
}

// and we're done (pending deferred functions)
AuditOperation("reset slave", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey))
AuditOperation("reset-slave", instanceKey, fmt.Sprintf("%+v replication reset", *instanceKey))

return instance, err
}
Expand Down Expand Up @@ -950,7 +950,7 @@ Cleanup:
}

// and we're done (pending deferred functions)
AuditOperation("detach slave", instanceKey, fmt.Sprintf("%+v replication detached", *instanceKey))
AuditOperation("detach-slave", instanceKey, fmt.Sprintf("%+v replication detached", *instanceKey))

return instance, err
}
Expand Down Expand Up @@ -991,7 +991,7 @@ Cleanup:
}

// and we're done (pending deferred functions)
AuditOperation("reattach slave", instanceKey, fmt.Sprintf("%+v replication reattached", *instanceKey))
AuditOperation("reattach-slave", instanceKey, fmt.Sprintf("%+v replication reattached", *instanceKey))

return instance, err
}
Expand Down Expand Up @@ -1654,6 +1654,7 @@ func MultiMatchSlaves(masterKey *InstanceKey, belowKey *InstanceKey, pattern str
if len(matchedSlaves) != len(slaves) {
err = fmt.Errorf("MultiMatchSlaves: only matched %d out of %d slaves of %+v; error is: %+v", len(matchedSlaves), len(slaves), *masterKey, err)
}
AuditOperation("multi-match-slaves", masterKey, fmt.Sprintf("matched %d slaves under %+v", len(matchedSlaves), *belowKey))

return matchedSlaves, belowInstance, err, errs
}
Expand Down
15 changes: 4 additions & 11 deletions go/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,9 @@ func RecoverDeadMaster(topologyRecovery *TopologyRecovery, skipProcesses bool) (
}

if promotedSlave == nil {
log.Debugf("topology_recovery: - RecoverDeadMaster: Failure: no slave promoted.")
inst.AuditOperation("recover-dead-master", failedInstanceKey, "Failure: no slave promoted.")
} else {
log.Debugf("topology_recovery: - RecoverDeadMaster: promoted slave is %+v", promotedSlave.Key)
inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("master: %+v", promotedSlave.Key))
inst.AuditOperation("recover-dead-master", failedInstanceKey, fmt.Sprintf("promoted slave: %+v", promotedSlave.Key))
}
return promotedSlave, lostSlaves, err
}
Expand Down Expand Up @@ -578,7 +576,6 @@ func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProce
recoveryResolved := false

inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, "problem found; will recover")
log.Debugf("topology_recovery: RecoverDeadIntermediateMaster: will recover %+v", *failedInstanceKey)
if !skipProcesses {
if err := executeProcesses(config.Config.PreFailoverProcesses, "PreFailoverProcesses", topologyRecovery, true); err != nil {
return nil, topologyRecovery.AddError(err)
Expand Down Expand Up @@ -613,8 +610,7 @@ func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProce
recoveryResolved = true
successorInstance = candidateSibling

log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: move to candidate intermediate master (%+v) went with %d errors", candidateSibling.Key, len(errs))
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs))
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated %d slaves under candidate sibling: %+v; %d errors: %+v", len(relocatedSlaves), candidateSibling.Key, len(errs), errs))
}
}
// Plan A: find a replacement intermediate master in same Data Center
Expand Down Expand Up @@ -655,8 +651,7 @@ func RecoverDeadIntermediateMaster(topologyRecovery *TopologyRecovery, skipProce

if len(relocatedSlaves) > 0 {
recoveryResolved = true
log.Debugf("topology_recovery: - RecoverDeadIntermediateMaster: relocated up to %+v", successorInstance.Key)
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Done. Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs))
inst.AuditOperation("recover-dead-intermediate-master", failedInstanceKey, fmt.Sprintf("Relocated slaves under: %+v %d errors: %+v", successorInstance.Key, len(errs), errs))
} else {
err = log.Errorf("topology_recovery: RecoverDeadIntermediateMaster failed to match up any slave from %+v", *failedInstanceKey)
topologyRecovery.AddError(err)
Expand Down Expand Up @@ -793,11 +788,9 @@ func checkAndRecoverDeadCoMaster(analysisEntry inst.ReplicationAnalysis, candida
coMaster, lostSlaves, err := RecoverDeadCoMaster(topologyRecovery, skipProcesses)
ResolveRecovery(topologyRecovery, coMaster)
if coMaster == nil {
log.Debugf("topology_recovery: - RecoverDeadCoMaster: Failure: no slave promoted.")
inst.AuditOperation("recover-dead-co-master", failedInstanceKey, "Failure: no slave promoted.")
} else {
log.Debugf("topology_recovery: - RecoverDeadCoMaster: promoted co-master is %+v", coMaster.Key)
inst.AuditOperation("recover-dead-co-master", failedInstanceKey, fmt.Sprintf("master: %+v", coMaster.Key))
inst.AuditOperation("recover-dead-co-master", failedInstanceKey, fmt.Sprintf("promoted co-master: %+v", coMaster.Key))
}
topologyRecovery.LostSlaves.AddInstances(lostSlaves)
if coMaster != nil {
Expand Down
7 changes: 4 additions & 3 deletions resources/public/css/orchestrator.css
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,9 @@ body {
padding-bottom: 0px;
}

#node_modal .modal-title {
font: 12px sans-serif;
#node_modal .modal-title code {
background-color: inherit;
font-weight: bold;
}

#node_modal .modal-title .downtime-message {
Expand Down Expand Up @@ -426,7 +427,7 @@ body {

#cluster_sidebar {
float: left;
position: absolute;
position: fixed;
left: 15px;
z-index:100;
}
Expand Down
2 changes: 1 addition & 1 deletion resources/public/js/orchestrator.js
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ function openNodeModal(node) {
$('#node_modal #modalDataAttributesTable button[data-btn][data-grouped!=true]').appendTo("#node_modal .modal-footer");
$('#node_modal #modalDataAttributesTable [data-btn-group]').appendTo("#node_modal .modal-footer");

$('#node_modal .modal-title').html(node.title);
$('#node_modal .modal-title').html('<code class="text-primary">'+node.title+"</code>");

$('#modalDataAttributesTable').html("");

Expand Down

0 comments on commit e8083e4

Please sign in to comment.