From d44dc0730b1bde5465f56522ce14eb8346a6002d Mon Sep 17 00:00:00 2001 From: dahn Date: Tue, 7 Jan 2020 09:12:41 +0100 Subject: [PATCH] only update powerstate if sure it is the latest (#3743) --- .../cloud/agent/manager/AgentManagerImpl.java | 2 +- .../cloud/vm/VirtualMachineManagerImpl.java | 27 +++++++---- .../vm/VirtualMachinePowerStateSyncImpl.java | 45 +++++++++++++------ .../java/com/cloud/vm/dao/VMInstanceDao.java | 2 +- .../com/cloud/vm/dao/VMInstanceDaoImpl.java | 6 ++- .../vmware/resource/VmwareResource.java | 2 +- 6 files changed, 57 insertions(+), 27 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 60911319e33..68e27211603 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -1242,7 +1242,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl if (s_logger.isDebugEnabled()) { if (cmd instanceof PingRoutingCommand) { logD = false; - s_logger.debug("Ping from " + hostId + "(" + hostName + ")"); + s_logger.debug("Ping from Routing host " + hostId + "(" + hostName + ")"); s_logger.trace("SeqA " + hostId + "-" + request.getSequence() + ": Processing " + request); } else if (cmd instanceof PingCommand) { logD = false; diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 3a3c760c71f..712b534e505 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -4226,7 +4226,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac break; case PowerOff: - case PowerReportMissing: + case PowerReportMissing: // rigorously set to Migrating? or just do nothing until...? or create a missing state? + // for now handle in line with legacy as power off handlePowerOffReportWithNoPendingJobsOnVM(vm); break; @@ -4337,8 +4338,15 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac case Running: case Stopped: case Migrating: - s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it"); - if(vm.isHaEnabled() && vm.getState() == State.Running && HaVmRestartHostUp.value() && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) { + if (s_logger.isInfoEnabled()) { + s_logger.info( + String.format("VM %s is at %s and we received a %s report while there is no pending jobs on it" + , vm.getInstanceName(), vm.getState(), vm.getPowerState())); + } + if(vm.isHaEnabled() && vm.getState() == State.Running + && HaVmRestartHostUp.value() + && vm.getHypervisorType() != HypervisorType.VMware + && vm.getHypervisorType() != HypervisorType.Hyperv) { s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart"); if(!_haMgr.hasPendingHaWork(vm.getId())) { _haMgr.scheduleRestart(vm, true); @@ -4348,11 +4356,14 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac return; } - final VirtualMachineGuru vmGuru = getVmGuru(vm); - final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); - if (!sendStop(vmGuru, profile, true, true)) { - // In case StopCommand fails, don't proceed further - return; + // not when report is missing + if(PowerState.PowerOff.equals(vm.getPowerState())) { + final VirtualMachineGuru vmGuru = getVmGuru(vm); + final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); + if (!sendStop(vmGuru, profile, true, true)) { + // In case StopCommand fails, don't proceed further + return; + } } try { diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java index 80a422574c7..14043a0523d 100644 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java @@ -38,7 +38,6 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat @Inject MessageBus _messageBus; @Inject VMInstanceDao _instanceDao; - @Inject VirtualMachineManager _vmMgr; @Inject ManagementServiceConfiguration mgmtServiceConf; public VirtualMachinePowerStateSyncImpl() { @@ -69,25 +68,30 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat private void processReport(long hostId, Map translatedInfo) { - if (s_logger.isDebugEnabled()) + if (s_logger.isDebugEnabled()) { s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size()); + } for (Map.Entry entry : translatedInfo.entrySet()) { if (s_logger.isDebugEnabled()) s_logger.debug("VM state report. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue()); - if (_instanceDao.updatePowerState(entry.getKey(), hostId, entry.getValue())) { - if (s_logger.isDebugEnabled()) - s_logger.debug("VM state report is updated. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue()); + if (_instanceDao.updatePowerState(entry.getKey(), hostId, entry.getValue(), DateUtil.currentGMTTime())) { + if (s_logger.isInfoEnabled()) { + s_logger.info("VM state report is updated. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue()); + } _messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE, PublishScope.GLOBAL, entry.getKey()); } else { - if (s_logger.isDebugEnabled()) - s_logger.debug("VM power state does not change, skip DB writing. vm id: " + entry.getKey()); + if (s_logger.isTraceEnabled()) { + s_logger.trace("VM power state does not change, skip DB writing. vm id: " + entry.getKey()); + } } } + // any state outdates should be checked against the time before this list was retrieved + Date startTime = DateUtil.currentGMTTime(); // for all running/stopping VMs, we provide monitoring of missing report List vmsThatAreMissingReport = _instanceDao.findByHostInStates(hostId, VirtualMachine.State.Running, VirtualMachine.State.Stopping, VirtualMachine.State.Starting); @@ -98,10 +102,12 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat it.remove(); } + // here we need to be wary of out of band migration as opposed to other, more unexpected state changes if (vmsThatAreMissingReport.size() > 0) { Date currentTime = DateUtil.currentGMTTime(); - if (s_logger.isDebugEnabled()) + if (s_logger.isDebugEnabled()) { s_logger.debug("Run missing VM report. current time: " + currentTime.getTime()); + } // 2 times of sync-update interval for graceful period long milliSecondsGracefullPeriod = mgmtServiceConf.getPingInterval() * 2000L; @@ -130,23 +136,34 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat } } - if (s_logger.isDebugEnabled()) - s_logger.debug("Detected missing VM. host: " + hostId + ", vm id: " + instance.getId() + - ", power state: PowerReportMissing, last state update: " + vmStateUpdateTime.getTime()); + if (s_logger.isInfoEnabled()) { + s_logger.info( + String.format("Detected missing VM. host: %l, vm id: %l(%s), power state: %s, last state update: %l" + , hostId + , instance.getId() + , instance.getUuid() + , VirtualMachine.PowerState.PowerReportMissing + , vmStateUpdateTime.getTime())); + } long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime(); if (milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) { s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period"); - if (_instanceDao.updatePowerState(instance.getId(), hostId, VirtualMachine.PowerState.PowerReportMissing)) { - if (s_logger.isDebugEnabled()) + // this is were a race condition might have happened if we don't re-fetch the instance; + // between the startime of this job and the currentTime of this missing-branch + // an update might have occurred that we should not override in case of out of band migration + if (_instanceDao.updatePowerState(instance.getId(), hostId, VirtualMachine.PowerState.PowerReportMissing, startTime)) { + if (s_logger.isDebugEnabled()) { s_logger.debug("VM state report is updated. host: " + hostId + ", vm id: " + instance.getId() + ", power state: PowerReportMissing "); + } _messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE, PublishScope.GLOBAL, instance.getId()); } else { - if (s_logger.isDebugEnabled()) + if (s_logger.isDebugEnabled()) { s_logger.debug("VM power state does not change, skip DB writing. vm id: " + instance.getId()); + } } } else { s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has not passed graceful period yet"); diff --git a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java index ce4d46abc25..a7ed9225254 100755 --- a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java +++ b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java @@ -139,7 +139,7 @@ public interface VMInstanceDao extends GenericDao, StateDao< List listStartingWithNoHostId(); - boolean updatePowerState(long instanceId, long powerHostId, VirtualMachine.PowerState powerState); + boolean updatePowerState(long instanceId, long powerHostId, VirtualMachine.PowerState powerState, Date wisdomEra); void resetVmPowerStateTracking(long instanceId); diff --git a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java index 405e023f10a..1945969f543 100755 --- a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -828,13 +828,15 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem } @Override - public boolean updatePowerState(final long instanceId, final long powerHostId, final VirtualMachine.PowerState powerState) { + public boolean updatePowerState(final long instanceId, final long powerHostId, final VirtualMachine.PowerState powerState, Date wisdomEra) { return Transaction.execute(new TransactionCallback() { @Override public Boolean doInTransaction(TransactionStatus status) { boolean needToUpdate = false; VMInstanceVO instance = findById(instanceId); - if (instance != null) { + if (instance != null + && (null == instance.getPowerStateUpdateTime() + || instance.getPowerStateUpdateTime().before(wisdomEra))) { Long savedPowerHostId = instance.getPowerHostId(); if (instance.getPowerState() != powerState || savedPowerHostId == null || savedPowerHostId.longValue() != powerHostId) { diff --git a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java index 02f758ba844..b5a1260a723 100644 --- a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java +++ b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java @@ -3817,7 +3817,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa } } else { - String msg = "VM " + cmd.getVmName() + " is no longer in vSphere"; + String msg = "VM " + cmd.getVmName() + " is no longer on the expected host in vSphere"; s_logger.info(msg); return new StopAnswer(cmd, msg, true); }