Improve logs for VM migrations (#12332)

This commit is contained in:
Nicolas Vazquez 2026-01-09 13:50:27 -03:00 committed by GitHub
parent e25cf437c0
commit 04875f1517
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 35 additions and 9 deletions

View File

@ -3053,7 +3053,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
}
protected void migrate(final VMInstanceVO vm, final long srcHostId, final DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException {
logger.info("Migrating {} to {}", vm, dest);
logger.info("Start preparing migration of the VM: {} to {}", vm, dest);
final long dstHostId = dest.getHost().getId();
final Host fromHost = _hostDao.findById(srcHostId);
if (fromHost == null) {
@ -3118,9 +3118,11 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
if (pfma == null || !pfma.getResult()) {
final String details = pfma != null ? pfma.getDetails() : "null answer returned";
final String msg = "Unable to prepare for migration due to " + details;
logger.error("Failed to prepare destination host {} for migration of VM {} : {}", dstHostId, vm.getInstanceName(), details);
pfma = null;
throw new AgentUnavailableException(msg, dstHostId);
}
logger.debug("Successfully prepared destination host {} for migration of VM {} ", dstHostId, vm.getInstanceName());
} catch (final OperationTimedoutException e1) {
throw new AgentUnavailableException("Operation timed out", dstHostId);
} finally {
@ -3141,18 +3143,23 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
volumeMgr.release(vm.getId(), dstHostId);
}
logger.info("Migration cancelled because state has changed: {}", vm);
throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm);
String msg = "Migration cancelled because state has changed: " + vm;
logger.warn(msg);
throw new ConcurrentOperationException(msg);
}
} catch (final NoTransitionException e1) {
_networkMgr.rollbackNicForMigration(vmSrc, profile);
volumeMgr.release(vm.getId(), dstHostId);
logger.info("Migration cancelled because {}", e1.getMessage());
String msg = String.format("Migration cancelled for VM %s due to state transition failure: %s",
vm.getInstanceName(), e1.getMessage());
logger.warn(msg, e1);
throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
} catch (final CloudRuntimeException e2) {
_networkMgr.rollbackNicForMigration(vmSrc, profile);
volumeMgr.release(vm.getId(), dstHostId);
logger.info("Migration cancelled because {}", e2.getMessage());
String msg = String.format("Migration cancelled for VM %s due to runtime exception: %s",
vm.getInstanceName(), e2.getMessage());
logger.error(msg, e2);
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
try {
@ -3172,8 +3179,12 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
final Answer ma = _agentMgr.send(vm.getLastHostId(), mc);
if (ma == null || !ma.getResult()) {
final String details = ma != null ? ma.getDetails() : "null answer returned";
String msg = String.format("Migration command failed for VM %s on source host id=%s to destination host %s: %s",
vm.getInstanceName(), vm.getLastHostId(), dstHostId, details);
logger.error(msg);
throw new CloudRuntimeException(details);
}
logger.info("Migration command successful for VM {}", vm.getInstanceName());
} catch (final OperationTimedoutException e) {
boolean success = false;
if (HypervisorType.KVM.equals(vm.getHypervisorType())) {
@ -3210,7 +3221,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
try {
if (!checkVmOnHost(vm, dstHostId)) {
logger.error("Unable to complete migration for {}", vm);
logger.error("Migration verification failed for VM {} : VM not found on destination host {} ", vm.getInstanceName(), dstHostId);
try {
_agentMgr.send(srcHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
} catch (final AgentUnavailableException e) {
@ -3225,7 +3236,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
migrated = true;
} finally {
if (!migrated) {
logger.info("Migration was unsuccessful. Cleaning up: {}", vm);
logger.info("Migration was unsuccessful. Cleaning up: {}", vm);
_networkMgr.rollbackNicForMigration(vmSrc, profile);
volumeMgr.release(vm.getId(), dstHostId);
// deallocate GPU devices for the VM on the destination host
@ -3237,7 +3248,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
try {
_agentMgr.send(dstHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
} catch (final AgentUnavailableException ae) {
logger.warn("Looks like the destination Host is unavailable for cleanup", ae);
logger.warn("Destination host {} unavailable for cleanup after failed migration of VM {}", dstHostId, vm.getInstanceName(), ae);
}
_networkMgr.setHypervisorHostname(profile, dest, false);
try {
@ -3246,6 +3257,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
logger.warn(e.getMessage());
}
} else {
logger.info("Migration completed successfully for VM %s" + vm);
_networkMgr.commitNicForMigration(vmSrc, profile);
volumeMgr.release(vm.getId(), srcHostId);
// deallocate GPU devices for the VM on the src host after migration is complete
@ -3276,6 +3288,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
migrateCommand.setVlanToPersistenceMap(vlanToPersistenceMap);
}
logger.debug("Setting auto convergence to: {}", StorageManager.KvmAutoConvergence.value());
migrateCommand.setAutoConvergence(StorageManager.KvmAutoConvergence.value());
migrateCommand.setHostGuid(destination.getHost().getGuid());

View File

@ -278,17 +278,20 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
// abort the vm migration if the job is executed more than vm.migrate.wait
final int migrateWait = libvirtComputingResource.getMigrateWait();
logger.info("vm.migrate.wait value set to: {}for VM: {}", migrateWait, vmName);
if (migrateWait > 0 && sleeptime > migrateWait * 1000) {
DomainState state = null;
try {
state = dm.getInfo().state;
logger.info("VM domain state when trying to abort migration : {}", state);
} catch (final LibvirtException e) {
logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
}
if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) {
try {
DomainJobInfo job = dm.getJobInfo();
logger.info(String.format("Aborting migration of VM [%s] with domain job [%s] due to time out after %d seconds.", vmName, job, migrateWait));
logger.warn("Aborting migration of VM {} with domain job [{}] due to timeout after {} seconds. " +
"Job stats: data processed={} bytes, data remaining={} bytes", vmName, job, migrateWait, job.getDataProcessed(), job.getDataRemaining());
dm.abortJob();
result = String.format("Migration of VM [%s] was cancelled by CloudStack due to time out after %d seconds.", vmName, migrateWait);
commandState = Command.State.FAILED;
@ -303,10 +306,12 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
// pause vm if we meet the vm.migrate.pauseafter threshold and not already paused
final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter();
logger.info("vm.migrate.pauseafter value set to: {} for VM: {}", migratePauseAfter, vmName);
if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) {
DomainState state = null;
try {
state = dm.getInfo().state;
logger.info("VM domain state when trying to pause VM for migration: {}", state);
} catch (final LibvirtException e) {
logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
}
@ -381,6 +386,7 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
}
if (result == null) {
logger.info("Post-migration cleanup for VM {}: ", vmName);
libvirtComputingResource.destroyNetworkRulesForVM(conn, vmName);
for (final InterfaceDef iface : ifaces) {
String vlanId = libvirtComputingResource.getVlanIdFromBridgeName(iface.getBrName());
@ -394,6 +400,7 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
commandState = Command.State.COMPLETED;
libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState);
} else if (commandState == null) {
logger.error("Migration of VM {} failed with result: {}", vmName, result);
commandState = Command.State.FAILED;
libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState);
}

View File

@ -56,6 +56,7 @@ public final class LibvirtPrepareForMigrationCommandWrapper extends CommandWrapp
final VirtualMachineTO vm = command.getVirtualMachine();
if (command.isRollback()) {
logger.info("Handling rollback for PrepareForMigration of VM {}", vm.getName());
return handleRollback(command, libvirtComputingResource);
}
@ -83,6 +84,7 @@ public final class LibvirtPrepareForMigrationCommandWrapper extends CommandWrapp
if (interfaceDef != null && interfaceDef.getNetType() == GuestNetType.VHOSTUSER) {
DpdkTO to = new DpdkTO(interfaceDef.getDpdkOvsPath(), interfaceDef.getDpdkSourcePort(), interfaceDef.getInterfaceMode());
dpdkInterfaceMapping.put(nic.getMac(), to);
logger.debug("Configured DPDK interface for VM {}", vm.getName());
}
}
@ -122,6 +124,7 @@ public final class LibvirtPrepareForMigrationCommandWrapper extends CommandWrapp
return new PrepareForMigrationAnswer(command, "failed to connect physical disks to host");
}
logger.info("Successfully prepared destination host for migration of VM {}", vm.getName());
return createPrepareForMigrationAnswer(command, dpdkInterfaceMapping, libvirtComputingResource, vm);
} catch (final LibvirtException | CloudRuntimeException | InternalErrorException | URISyntaxException e) {
if (MapUtils.isNotEmpty(dpdkInterfaceMapping)) {
@ -157,6 +160,7 @@ public final class LibvirtPrepareForMigrationCommandWrapper extends CommandWrapp
KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr();
VirtualMachineTO vmTO = command.getVirtualMachine();
logger.info("Rolling back PrepareForMigration for VM {}: disconnecting physical disks", vmTO.getName());
if (!storagePoolMgr.disconnectPhysicalDisksViaVmSpec(vmTO)) {
return new PrepareForMigrationAnswer(command, "failed to disconnect physical disks from host");
}

View File

@ -7189,6 +7189,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir
throw new CloudRuntimeException("Unable to find suitable destination to migrate VM " + vm.getInstanceName());
}
logger.info("Starting migration of VM {} from host {} to host {} ", vm.getInstanceName(), srcHostId, dest.getHost().getId());
collectVmDiskAndNetworkStatistics(vmId, State.Running);
_itMgr.migrate(vm.getUuid(), srcHostId, dest);
return findMigratedVm(vm.getId(), vm.getType());
@ -7260,6 +7261,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir
private DeployDestination checkVmMigrationDestination(VMInstanceVO vm, Host srcHost, Host destinationHost) throws VirtualMachineMigrationException {
if (destinationHost == null) {
logger.error("Destination host is null for migration of VM: {}", vm.getInstanceName());
return null;
}
if (destinationHost.getId() == srcHost.getId()) {