From 50f311ed947bd22404465ca5df416abe0fdcfc4e Mon Sep 17 00:00:00 2001 From: Kelven Yang Date: Wed, 22 Jan 2014 11:09:45 -0800 Subject: [PATCH] CLOUDSTACK-5928: disable host delta sync when new VM sync is enabled --- .../cloud/vm/VirtualMachineManagerImpl.java | 38 +++++++-- .../vmware/manager/VmwareManagerImpl.java | 85 ++++++++++--------- .../cloud/storage/VolumeApiServiceImpl.java | 2 +- .../vm/snapshot/VMSnapshotManagerImpl.java | 2 +- 4 files changed, 75 insertions(+), 52 deletions(-) diff --git a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java index 6dc58944d07..40b9151442a 100755 --- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -337,7 +337,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac // TODO, remove it after transient period is over static final ConfigKey VmJobEnabled = new ConfigKey("Advanced", - Boolean.class, "vm.job.enabled", "false", + Boolean.class, "vm.job.enabled", "true", "True to enable new VM sync model. false to use the old way", false); static final ConfigKey VmJobCheckInterval = new ConfigKey("Advanced", @@ -2944,12 +2944,14 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac if (cmd instanceof PingRoutingCommand) { PingRoutingCommand ping = (PingRoutingCommand)cmd; if (ping.getNewStates() != null && ping.getNewStates().size() > 0) { - Commands commands = deltaHostSync(agentId, ping.getNewStates()); - if (commands.size() > 0) { - try { - _agentMgr.send(agentId, commands, this); - } catch (final AgentUnavailableException e) { - s_logger.warn("Agent is now unavailable", e); + if (!VmJobEnabled.value()) { + Commands commands = deltaHostSync(agentId, ping.getNewStates()); + if (commands.size() > 0) { + try { + _agentMgr.send(agentId, commands, this); + } catch (final AgentUnavailableException e) { + s_logger.warn("Agent is now unavailable", e); + } } } } @@ -3973,6 +3975,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac s_logger.warn("VM " + vmId + " no longer exists when processing VM state report"); } } else { + s_logger.info("There is pending job working on the VM. vm id: " + vmId + ", postpone power-change report by resetting power-change counters"); + // reset VM power state tracking so that we won't lost signal when VM has // been translated to _vmDao.resetVmPowerStateTracking(vmId); @@ -3981,19 +3985,23 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac private void handlePowerOnReportWithNoPendingJobsOnVM(VMInstanceVO vm) { // - // 1) handle left-over transitional VM states + // 1) handle left-over transitional VM states // 2) handle out of band VM live migration // 3) handle out of sync stationary states, marking VM from Stopped to Running with // alert messages // switch (vm.getState()) { case Starting: + s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it"); + try { stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId()); } catch (NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); } + s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor"); + // we need to alert admin or user about this risky state transition _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() @@ -4008,10 +4016,13 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } catch (NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); } + break; case Stopping: case Stopped: + s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it"); + try { stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId()); } catch (NoTransitionException e) { @@ -4020,6 +4031,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState() + " -> Running) from out-of-context transition. VM network environment may need to be reset"); + + s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor"); break; case Destroyed: @@ -4029,11 +4042,13 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac break; case Migrating: + s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it"); try { stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId()); } catch (NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); } + s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor"); break; case Error: @@ -4046,7 +4061,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac private void handlePowerOffReportWithNoPendingJobsOnVM(VMInstanceVO vm) { - // 1) handle left-over transitional VM states + // 1) handle left-over transitional VM states // 2) handle out of sync stationary states, schedule force-stop to release resources // switch (vm.getState()) { @@ -4055,14 +4070,19 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac case Running: case Stopped: case Migrating: + s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it"); try { stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOffReport, vm.getPowerHostId()); } catch (NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); } + _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState() + " -> Stopped) from out-of-context transition."); + + s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Stopped state according to power-on report from hypervisor"); + // TODO: we need to forcely release all resource allocation break; diff --git a/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java b/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java index bf56c54440b..028fa942d57 100755 --- a/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java +++ b/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java @@ -132,8 +132,8 @@ import com.cloud.vm.DomainRouterVO; public class VmwareManagerImpl extends ManagerBase implements VmwareManager, VmwareStorageMount, Listener, VmwareDatacenterService { private static final Logger s_logger = Logger.getLogger(VmwareManagerImpl.class); - private static final int STARTUP_DELAY = 60000; // 60 seconds - private static final long DEFAULT_HOST_SCAN_INTERVAL = 600000; // every 10 minutes + private static final int STARTUP_DELAY = 60000; // 60 seconds + private static final long DEFAULT_HOST_SCAN_INTERVAL = 600000; // every 10 minutes private long _hostScanInterval = DEFAULT_HOST_SCAN_INTERVAL; int _timeout; @@ -173,7 +173,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw String _managemetPortGroupName; String _defaultSystemVmNicAdapterType = VirtualEthernetCardType.E1000.toString(); String _recycleHungWorker = "false"; - long _hungWorkerTimeout = 7200000; // 2 hour + long _hungWorkerTimeout = 7200000; // 2 hour int _additionalPortRangeStart; int _additionalPortRangeSize; int _routerExtraPublicNics = 2; @@ -296,10 +296,10 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw if(_recycleHungWorker == null || _recycleHungWorker.isEmpty()) { _recycleHungWorker = "false"; } - + value = _configDao.getValue(Config.VmwareHungWorkerTimeout.key()); if(value != null) - _hungWorkerTimeout = Long.parseLong(value) * 1000; + _hungWorkerTimeout = Long.parseLong(value) * 1000; _rootDiskController = _configDao.getValue(Config.VmwareRootDiskControllerType.key()); if(_rootDiskController == null || _rootDiskController.isEmpty()) { @@ -459,7 +459,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw hostSpec.setUserName(userName); hostSpec.setPassword(password); hostSpec.setHostName(host); - hostSpec.setForce(true); // forcely take over the host + hostSpec.setForce(true); // forcely take over the host ManagedObjectReference morTask = serviceContext.getService().addHostTask(morCluster, hostSpec, true, null, null); boolean taskResult = vclient.waitForTask(morTask); @@ -547,49 +547,49 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw public void gcLeftOverVMs(VmwareContext context) { VmwareCleanupMaid.gcLeftOverVMs(context); } - + @Override public boolean needRecycle(String workerTag) { - if(s_logger.isInfoEnabled()) - s_logger.info("Check to see if a worker VM with tag " + workerTag + " needs to be recycled"); - - if(workerTag == null || workerTag.isEmpty()) { - s_logger.error("Invalid worker VM tag " + workerTag); - return false; - } - - String tokens[] = workerTag.split("-"); - if(tokens.length != 3) { - s_logger.error("Invalid worker VM tag " + workerTag); - return false; - } - - long startTick = Long.parseLong(tokens[0]); - long msid = Long.parseLong(tokens[1]); - long runid = Long.parseLong(tokens[2]); - + if (s_logger.isInfoEnabled()) + s_logger.info("Check to see if a worker VM with tag " + workerTag + " needs to be recycled"); + + if (workerTag == null || workerTag.isEmpty()) { + s_logger.error("Invalid worker VM tag " + workerTag); + return false; + } + + String tokens[] = workerTag.split("-"); + if (tokens.length != 3) { + s_logger.error("Invalid worker VM tag " + workerTag); + return false; + } + + long startTick = Long.parseLong(tokens[0]); + long msid = Long.parseLong(tokens[1]); + long runid = Long.parseLong(tokens[2]); + if(_mshostPeerDao.countStateSeenInPeers(msid, runid, ManagementServerHost.State.Down) > 0) { - if(s_logger.isInfoEnabled()) - s_logger.info("Worker VM's owner management server node has been detected down from peer nodes, recycle it"); - return true; + if (s_logger.isInfoEnabled()) + s_logger.info("Worker VM's owner management server node has been detected down from peer nodes, recycle it"); + return true; } - + if(msid == _clusterMgr.getManagementNodeId() && runid != _clusterMgr.getCurrentRunId()) { - if(s_logger.isInfoEnabled()) - s_logger.info("Worker VM's owner management server has changed runid, recycle it"); - return true; + if (s_logger.isInfoEnabled()) + s_logger.info("Worker VM's owner management server has changed runid, recycle it"); + return true; } - + // disable time-out check until we have found out a VMware API that can check if // there are pending tasks on the subject VM -/* - if(System.currentTimeMillis() - startTick > _hungWorkerTimeout) { - if(s_logger.isInfoEnabled()) - s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000); - return true; - } -*/ - return false; + /* + if(System.currentTimeMillis() - startTick > _hungWorkerTimeout) { + if(s_logger.isInfoEnabled()) + s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000); + return true; + } + */ + return false; } @Override @@ -619,6 +619,9 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw ", destination: " + destIso.getAbsolutePath()); try { FileUtil.copyfile(srcIso, destIso); + + s_logger.info("System VM patch ISO file is copied to secondary storage. source ISO: " + srcIso.getAbsolutePath() + + ", destination: " + destIso.getAbsolutePath()); } catch(IOException e) { s_logger.error("Unexpected exception ", e); diff --git a/server/src/com/cloud/storage/VolumeApiServiceImpl.java b/server/src/com/cloud/storage/VolumeApiServiceImpl.java index f46f1e9e0d0..6be43808008 100644 --- a/server/src/com/cloud/storage/VolumeApiServiceImpl.java +++ b/server/src/com/cloud/storage/VolumeApiServiceImpl.java @@ -333,7 +333,7 @@ public class VolumeApiServiceImpl extends ManagerBase implements VolumeApiServic // TODO static final ConfigKey VmJobEnabled = new ConfigKey("Advanced", - Boolean.class, "vm.job.enabled", "false", + Boolean.class, "vm.job.enabled", "true", "True to enable new VM sync model. false to use the old way", false); static final ConfigKey VmJobCheckInterval = new ConfigKey("Advanced", Long.class, "vm.job.check.interval", "3000", diff --git a/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java b/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java index e273e1a2338..ceded0867dc 100644 --- a/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java +++ b/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java @@ -136,7 +136,7 @@ public class VMSnapshotManagerImpl extends ManagerBase implements VMSnapshotMana // TODO static final ConfigKey VmJobEnabled = new ConfigKey("Advanced", - Boolean.class, "vm.job.enabled", "false", + Boolean.class, "vm.job.enabled", "true", "True to enable new VM sync model. false to use the old way", false); static final ConfigKey VmJobCheckInterval = new ConfigKey("Advanced", Long.class, "vm.job.check.interval", "3000",