From 306d0907bc5711d026e343680cbbfe2c000b27d8 Mon Sep 17 00:00:00 2001 From: Devdeep Singh Date: Wed, 3 Jul 2013 09:28:47 -0700 Subject: [PATCH] Fixing migration of a virtual machine with storage in the new framework. Made changes to queue a VmWorkJob for migration. The work gets picked up by job dispatcher which carries out migration of the vm with storage. --- .../MigrateVirtualMachineWithVolumeCmd.java | 3 +- .../com/cloud/vm/VirtualMachineManager.java | 3 +- .../cloud/vm/VirtualMachineManagerImpl.java | 188 ++++++++++-------- .../src/com/cloud/vm/VmWorkJobDispatcher.java | 11 +- .../src/com/cloud/vm/UserVmManagerImpl.java | 9 +- 5 files changed, 128 insertions(+), 86 deletions(-) diff --git a/api/src/org/apache/cloudstack/api/command/admin/vm/MigrateVirtualMachineWithVolumeCmd.java b/api/src/org/apache/cloudstack/api/command/admin/vm/MigrateVirtualMachineWithVolumeCmd.java index d38d13c1b84..b8f049ef985 100644 --- a/api/src/org/apache/cloudstack/api/command/admin/vm/MigrateVirtualMachineWithVolumeCmd.java +++ b/api/src/org/apache/cloudstack/api/command/admin/vm/MigrateVirtualMachineWithVolumeCmd.java @@ -137,7 +137,8 @@ public class MigrateVirtualMachineWithVolumeCmd extends BaseAsyncCmd { VirtualMachine migratedVm = _userVmService.migrateVirtualMachineWithVolume(getVirtualMachineId(), destinationHost, getVolumeToPool()); if (migratedVm != null) { - UserVmResponse response = _responseGenerator.createUserVmResponse("virtualmachine", (UserVm)migratedVm).get(0); + UserVm vm = _entityMgr.findById(UserVm.class, migratedVm.getId()); + UserVmResponse response = _responseGenerator.createUserVmResponse("virtualmachine", vm).get(0); response.setResponseName(getCommandName()); this.setResponseObject(response); } else { diff --git a/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java b/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java index 954d3b63f0a..95fbb908838 100644 --- a/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java +++ b/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java @@ -139,8 +139,7 @@ public interface VirtualMachineManager extends Manager { Outcome migrate(String vmUuid, long srcHostId, DeployDestination dest); - VirtualMachine migrateWithStorage(String vmUuid, long srcId, long destId, Map volumeToPool) throws ResourceUnavailableException, - ConcurrentOperationException, ManagementServerException, VirtualMachineMigrationException; + Outcome migrateWithStorage(String vmUuid, long srcHostId, long destId, Map volumeToPool); void reboot(String vmUuid); diff --git a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java index 5f789831488..4dd518debd4 100755 --- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -603,7 +603,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac @DB protected void changeState2(VMInstanceVO vm, VirtualMachine.Event vmEvent, Long hostId, VmWorkJobVO work, VirtualMachine.Event workEvent) throws NoTransitionException { VmWorkJobVO.Step currentStep = work.getStep(); - StateMachine sm = VmWorkJobDispatcher.Migrate.equals(work.getCmd()) ? MigrationStateMachine : null; + StateMachine sm = (VmWorkJobDispatcher.Migrate.equals(work.getCmd()) || + VmWorkJobDispatcher.MigrateWithStorage.equals(work.getCmd())) ? MigrationStateMachine : null; Transaction txn = Transaction.currentTxn(); @@ -1633,51 +1634,21 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } } - private void moveVmToMigratingState(VMInstanceVO vm, Long hostId, VmWorkJobVO work) - throws ConcurrentOperationException { - // Put the vm in migrating state. - try { - if (!changeState(vm, VirtualMachine.Event.MigrationRequested, hostId, work, Step.Migrating)) { - s_logger.info("Migration cancelled because state has changed: " + vm); - throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm); - } - } catch (NoTransitionException e) { - s_logger.info("Migration cancelled because " + e.getMessage()); - throw new ConcurrentOperationException("Migration cancelled because " + e.getMessage()); - } - } - - private void moveVmOutofMigratingStateOnSuccess(VMInstanceVO vm, Long hostId, VmWorkJobVO work) - throws ConcurrentOperationException { - // Put the vm in running state. - try { - if (!changeState(vm, VirtualMachine.Event.OperationSucceeded, hostId, work, Step.Started)) { - s_logger.error("Unable to change the state for " + vm); - throw new ConcurrentOperationException("Unable to change the state for " + vm); - } - } catch (NoTransitionException e) { - s_logger.error("Unable to change state due to " + e.getMessage()); - throw new ConcurrentOperationException("Unable to change state due to " + e.getMessage()); - } - } - @Override - public VirtualMachine migrateWithStorage(String vmUuid, long srcHostId, long destHostId, - Map volumeToPool) throws ResourceUnavailableException, ConcurrentOperationException, - ManagementServerException, VirtualMachineMigrationException { + public Outcome migrateWithStorage(String vmUuid, long srcHostId, long destHostId, + Map volumeToPool) { CallContext context = CallContext.current(); + User user = context.getCallingUser(); + Account account = context.getCallingAccount(); - VMInstanceVO vm = _vmDao.findByUuid(vmUuid); HostVO srcHost = _entityMgr.findById(HostVO.class, srcHostId); HostVO destHost = _entityMgr.findById(HostVO.class, destHostId); - VirtualMachineGuru vmGuru = getVmGuru(vm); - DataCenterVO dc = _entityMgr.findById(DataCenterVO.class, destHost.getDataCenterId()); HostPodVO pod = _entityMgr.findById(HostPodVO.class, destHost.getPodId()); Cluster cluster = _entityMgr.findById(ClusterVO.class, destHost.getClusterId()); - DeployDestination destination = new DeployDestination(dc, pod, cluster, destHost); // Create a map of which volume should go in which storage pool. + final VMInstanceVO vm = _vmDao.findByUuid(vmUuid); VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); filterPoolListForVolumesForMigration(profile, destHost, volumeToPool); @@ -1688,6 +1659,59 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac " to destination host " + destHost + " doesn't involve migrating the volumes."); } + DeployDestination destination = new DeployDestination(dc, pod, cluster, destHost, volumeToPool); + VmWorkJobVO workJob = null; + Transaction txn = Transaction.currentTxn(); + txn.start(); + + _vmDao.lockRow(vm.getId(), true); + + workJob = new VmWorkJobVO(context.getContextId()); + workJob.setDispatcher(VmWorkJobDispatcher.VM_WORK_JOB_DISPATCHER); + workJob.setCmd(VmWorkJobDispatcher.MigrateWithStorage); + workJob.setAccountId(account.getId()); + workJob.setUserId(user.getId()); + workJob.setVmType(vm.getType()); + workJob.setVmInstanceId(vm.getId()); + + // save work context info (there are some duplications) + VmWorkMigrate workInfo = new VmWorkMigrate(user.getId(), account.getId(), vm.getId(), srcHostId, destination); + workJob.setCmdInfo(VmWorkJobDispatcher.serialize(workInfo)); + + _jobMgr.submitAsyncJob(workJob, VmWorkJobDispatcher.VM_WORK_QUEUE, vm.getId()); + + txn.commit(); + final long jobId = workJob.getId(); + AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(jobId); + return new VmOutcome(workJob, VirtualMachine.PowerState.PowerOn, vm.getId(), vm.getPowerHostId()); + } + + public void orchestrateMigrateWithStorage(String vmUuid, long srcHostId, DeployDestination destination) + throws AgentUnavailableException, OperationTimedoutException { + AsyncJobExecutionContext jc = AsyncJobExecutionContext.getCurrentExecutionContext(); + + VMInstanceVO vm = _vmDao.findByUuid(vmUuid); + if (vm == null) { + throw new CloudRuntimeException("Unable to find the vm " + vm); + } + + if (vm.getState() != State.Running || vm.getHostId() == null || vm.getHostId() != srcHostId ) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Proper conditions to migrate " + vm + " is not met."); + } + return; + } + + Host fromHost = _entityMgr.findById(Host.class, srcHostId); + if (fromHost == null) { + throw new CloudRuntimeException("Unable to find the host to migrate from: " + srcHostId); + } + long dstHostId = destination.getHost().getId(); + + if (s_logger.isDebugEnabled()) { + s_logger.debug("Migrating " + vm + " to " + destination); + } + short alertType = AlertManager.ALERT_TYPE_USERVM_MIGRATE; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE; @@ -1695,65 +1719,69 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY_MIGRATE; } - _networkMgr.prepareNicForMigration(profile, destination); - _volumeMgr.prepareForMigration(profile, destination); - HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); - VirtualMachineTO to = hvGuru.implement(profile); + VirtualMachineProfile srcVm = new VirtualMachineProfileImpl(vm); + for (NicProfile nic : _networkMgr.getNicProfiles(vm)) { + srcVm.addNic(nic); + } - VmWorkJobVO work = new VmWorkJobVO(context.getContextId()); -// VmWorkJobVO work = new VmWorkJobVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId()); -// work.setStep(Step.Prepare); -// work.setResourceType(ItWorkVO.ResourceType.Host); -// work.setResourceId(destHostId); -// work = _workDao.persist(work); + VirtualMachineProfile dstVm = new VirtualMachineProfileImpl(vm); + _networkMgr.prepareNicForMigration(dstVm, destination); + _volumeMgr.prepareForMigration(dstVm, destination); - // Put the vm in migrating state. - vm.setLastHostId(srcHostId); - moveVmToMigratingState(vm, destHostId, work); + VirtualMachineTO to = toVmTO(dstVm); + + VmWorkJobVO work = _workJobDao.findById(jc.getJob().getId()); + work.setStep(MigrationStateMachine.getNextState(Step.Filed, VirtualMachine.Event.MigrationRequested)); + work = _workJobDao.persist(work); - boolean migrated = false; try { - // Migrate the vm and its volume. - _volumeMgr.migrateVolumes(vm, to, srcHost, destHost, volumeToPool); + vm.setLastHostId(srcHostId); + changeState2(vm, VirtualMachine.Event.MigrationRequested, dstHostId, work, VirtualMachine.Event.OperationSucceeded); - // Put the vm back to running state. - moveVmOutofMigratingStateOnSuccess(vm, destHost.getId(), work); + // Migrate the vm and its volume. + HostVO srcHost = _entityMgr.findById(HostVO.class, srcHostId); + _volumeMgr.migrateVolumes(vm, to, srcHost, destination.getHost(), destination.getStorageForDisks()); + + if (!changeState(vm, VirtualMachine.Event.OperationSucceeded, dstHostId, work, Step.Started)) { + throw new CloudRuntimeException("Unable to change the state for " + vm); + } try { - if (!checkVmOnHost(vm, destHostId)) { - s_logger.error("Vm not found on destination host. Unable to complete migration for " + vm); - try { - _agentMgr.send(srcHostId, new Commands(cleanup(vm.getInstanceName())), null); - } catch (AgentUnavailableException e) { - s_logger.error("AgentUnavailableException while cleanup on source host: " + srcHostId); - } - cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, true); - return null; + if (!checkVmOnHost(vm, dstHostId)) { + throw new CloudRuntimeException("Unable to complete migration for " + vm); } } catch (OperationTimedoutException e) { - s_logger.warn("Error while checking the vm " + vm + " is on host " + destHost, e); + s_logger.warn("Unable to verify that " + vm + " has migrated but since the migrate command worked, it is assumed to have worked"); } - migrated = true; - return vm; + _networkMgr.commitNicForMigration(srcVm, dstVm); + changeState2(vm, null, dstHostId, work, VirtualMachine.Event.OperationSucceeded); + + } catch (NoTransitionException e) { + throw new CloudRuntimeException("Unable to change state", e); } finally { - if (!migrated) { - s_logger.info("Migration was unsuccessful. Cleaning up: " + vm); - _alertMgr.sendAlert(alertType, srcHost.getDataCenterId(), srcHost.getPodId(), "Unable to migrate vm " + - vm.getInstanceName() + " from host " + srcHost.getName() + " in zone " + dc.getName() + - " and pod " + dc.getName(), "Migrate Command failed. Please check logs."); + Step step = work.getStep(); + if (step != Step.Done) { + s_logger.debug("Migration was unsuccessful. Cleaning up: " + vm + " Step was at " + step); + + _alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(), "Unable to migrate vm " + vm.getInstanceName() + " from host " + fromHost.getName() + + " in zone " + destination.getDataCenter().getName() + " and pod " + destination.getPod().getName(), "Migrate Command failed. Please check logs."); + + boolean cleanup = false; try { - _agentMgr.send(destHostId, new Commands(cleanup(vm.getInstanceName())), null); - stateTransitTo(vm, VirtualMachine.Event.OperationFailed, srcHostId); - } catch (AgentUnavailableException e) { - s_logger.warn("Looks like the destination Host is unavailable for cleanup.", e); - } catch (NoTransitionException e) { - s_logger.error("Error while transitioning vm from migrating to running state.", e); + cleanupMigration(work, dstVm, vm, false); + cleanup = true; + } catch (Exception ae) { + s_logger.warn("Unable to cleanup migration for " + vm); + } + + if (cleanup) { + _networkMgr.rollbackNicForMigration(srcVm, dstVm); + + work.setStep(Step.Done); + _workJobDao.update(work.getId(), work); } } - - work.setStep(Step.Done); - // FIXME _workDao.update(work.getId(), work); } } diff --git a/engine/orchestration/src/com/cloud/vm/VmWorkJobDispatcher.java b/engine/orchestration/src/com/cloud/vm/VmWorkJobDispatcher.java index 272a18b449f..bb7f4b85115 100644 --- a/engine/orchestration/src/com/cloud/vm/VmWorkJobDispatcher.java +++ b/engine/orchestration/src/com/cloud/vm/VmWorkJobDispatcher.java @@ -61,6 +61,7 @@ public class VmWorkJobDispatcher extends AdapterBase implements AsyncJobDispatch public final static String Start = "start"; public final static String Stop = "stop"; public final static String Migrate = "migrate"; + public final static String MigrateWithStorage = "migratewithstorage"; @Inject private VirtualMachineManagerImpl _vmMgr; @@ -82,9 +83,12 @@ public class VmWorkJobDispatcher extends AdapterBase implements AsyncJobDispatch work = deserialize(VmWorkStop.class, job.getCmdInfo()); } else if (cmd.equals(Migrate)) { work = deserialize(VmWorkMigrate.class, job.getCmdInfo()); + } else if (cmd.equals(MigrateWithStorage)) { + work = deserialize(VmWorkMigrate.class, job.getCmdInfo()); } - assert(work != null); - + + assert(work != null); + CallContext.register(work.getUserId(), work.getAccountId(), job.getRelated()); VMInstanceVO vm = _instanceDao.findById(work.getVmId()); @@ -102,6 +106,9 @@ public class VmWorkJobDispatcher extends AdapterBase implements AsyncJobDispatch } else if (cmd.equals(Migrate)) { VmWorkMigrate migrate = (VmWorkMigrate)work; _vmMgr.orchestrateMigrate(vm.getUuid(), migrate.getSrcHostId(), migrate.getDeployDestination()); + } else if (cmd.equals(MigrateWithStorage)) { + VmWorkMigrate migrate = (VmWorkMigrate)work; + _vmMgr.orchestrateMigrateWithStorage(vm.getUuid(), migrate.getSrcHostId(), migrate.getDeployDestination()); } _asyncJobMgr.completeAsyncJob(job.getId(), JobInfo.Status.SUCCEEDED, 0, null); } catch(Throwable e) { diff --git a/server/src/com/cloud/vm/UserVmManagerImpl.java b/server/src/com/cloud/vm/UserVmManagerImpl.java index 22eb4c1d2cf..ff30bdc214d 100755 --- a/server/src/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/com/cloud/vm/UserVmManagerImpl.java @@ -4027,7 +4027,14 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Use " migrate to this host"); } - return _itMgr.migrateWithStorage(vm.getUuid(), srcHostId, destinationHost.getId(), volToPoolObjectMap); + Outcome outcome = _itMgr.migrateWithStorage(vm.getUuid(), srcHostId, destinationHost.getId(), volToPoolObjectMap); + try { + return outcome.get(); + } catch (InterruptedException e) { + throw new CloudRuntimeException("Interrupted while waiting for the outcome of " + outcome.getJob()); + } catch (java.util.concurrent.ExecutionException e) { + throw new CloudRuntimeException("Unable to start virtual machine", e.getCause()); + } } @DB