Skip to content

Commit

Permalink
track live migration process and cancel stale migrations
Browse files Browse the repository at this point in the history
Signed-off-by: Vladik Romanovsky <[email protected]>
  • Loading branch information
vladikr committed Mar 1, 2019
1 parent 08c9724 commit 7a85d53
Showing 1 changed file with 108 additions and 0 deletions.
108 changes: 108 additions & 0 deletions pkg/virt-launcher/virtwrap/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@ import (
"encoding/xml"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"

eventsclient "kubevirt.io/kubevirt/pkg/virt-launcher/notify-client"

libvirt "github.com/libvirt/libvirt-go"
k8sv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilwait "k8s.io/apimachinery/pkg/util/wait"

Expand Down Expand Up @@ -365,6 +368,111 @@ func (l *LibvirtDomainManager) asyncMigrate(vmi *v1.VirtualMachineInstance) {
}(l, vmi)
}

func getVMIEphemeralDisksTotalSize() *resource.Quantity {
var baseDir = "/var/run/libvirt/kubevirt-ephemeral-disk"
totalSize := int64(0)
err := filepath.Walk(baseDir, func(path string, f os.FileInfo, err error) error {
if !f.IsDir() {
totalSize += f.Size()
}
return err
})
if err != nil {
log.Log.Reason(err).Warning("failed to get VMI ephemeral disks size")
return &resource.Quantity{Format: resource.BinarySI}
}

return resource.NewScaledQuantity(totalSize, 0)
}

func getVMIMigrationDataSize(vmi *v1.VirtualMachineInstance) int64 {
var memory resource.Quantity

// Take memory from the requested memory
if v, ok := vmi.Spec.Domain.Resources.Requests[k8sv1.ResourceMemory]; ok {
memory = v
}
// In case that guest memory is explicitly set, override it
if vmi.Spec.Domain.Memory != nil && vmi.Spec.Domain.Memory.Guest != nil {
memory = *vmi.Spec.Domain.Memory.Guest
}

//get total data Size
if vmi.Status.MigrationMethod == v1.BlockMigration {
disksSize := getVMIEphemeralDisksTotalSize()
memory.Add(*disksSize)
}
return memory.ScaledValue(resource.Giga)
}

func liveMigrationMonitor(vmi *v1.VirtualMachineInstance, dom cli.VirDomain) {
logger := log.Log.Object(vmi)
start := time.Now().UTC().Unix()
lastProgressUpdate := start
progressWatermark := int64(0)
// TODO:(vladikr) move to configMap
progressTimeout := int64(150)

// TODO:(vladikr) move to configMap
completionTimeoutPerGiB := int64(800)
acceptableCompletionTime := completionTimeoutPerGiB * getVMIMigrationDataSize(vmi)
for {
stats, err := dom.GetJobStats(0)
if err != nil {
logger.Reason(err).Error("failed to get domain job info")
break
}
remainingData := int64(stats.DataRemaining)
switch stats.Type {
case libvirt.DOMAIN_JOB_UNBOUNDED:
// Migration is running
now := time.Now().UTC().Unix()
elapsed := now - start

if (progressWatermark == 0) ||
(progressWatermark > remainingData) {
progressWatermark = remainingData
lastProgressUpdate = now
}
// check if the migration is progressing
progressDelay := now - lastProgressUpdate
if progressTimeout != 0 &&
progressDelay > progressTimeout {
logger.Warningf("Live migration stuck for %d sec", progressDelay)
err := dom.AbortJob()
if err != nil {
logger.Reason(err).Error("failed to abort migration")
}
}

// check the overall migration time
if acceptableCompletionTime != 0 &&
elapsed > acceptableCompletionTime {
logger.Warningf("Live migration not completed after %d sec",
acceptableCompletionTime)
err := dom.AbortJob()
if err != nil {
logger.Reason(err).Error("failed to abort migration")
}
}

case libvirt.DOMAIN_JOB_NONE:
logger.Info("Migration job didn't start yet")
case libvirt.DOMAIN_JOB_COMPLETED:
logger.Info("Migration has beem completed")
break
case libvirt.DOMAIN_JOB_FAILED:
logger.Info("Migration job failed")
// migration failed
break
case libvirt.DOMAIN_JOB_CANCELLED:
logger.Info("Migration was canceled")
break
}
time.Sleep(500 * time.Millisecond)
}
}

func (l *LibvirtDomainManager) MigrateVMI(vmi *v1.VirtualMachineInstance) error {

if vmi.Status.MigrationState == nil {
Expand Down

0 comments on commit 7a85d53

Please sign in to comment.