From 2ffc0c507391fe5a20a18b753b8cfa465a9599f9 Mon Sep 17 00:00:00 2001 From: Andrija Panic <45762285+andrijapanicsb@users.noreply.github.com> Date: Tue, 5 Nov 2019 10:46:43 +0100 Subject: [PATCH 1/4] Increase DHCP lease time to infinite (#3662) * Increase lease time to infinite Lease time set to effectively infinite (36000+ days) since we fully control VM lifecycle via CloudStack Infinite time helps avoid some edge cases which could cause DHCPNAK being sent to VMs since (RHEL) system lose routes when they receive DHCPNAK When VM is expunged, it's active lease and DHCP/DNS config is properly removed from related files in VR. * desc fix --- systemvm/debian/opt/cloud/bin/cs/CsDhcp.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py b/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py index 56b44195f6d..72c5daec0cc 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py @@ -160,9 +160,15 @@ class CsDhcp(CsDataBag): def add(self, entry): self.add_host(entry['ipv4_address'], entry['host_name']) - # lease time boils down to once a month - # with a splay of 60 hours to prevent storms - lease = randint(700, 760) + # Lease time set to effectively infinite (36000+ days) since we properly control all DHCP/DNS config via CloudStack. + # Infinite time helps avoid some edge cases which could cause DHCPNAK being sent to VMs since + # (RHEL) system lose routes when they receive DHCPNAK. + # When VM is expunged, its active lease and DHCP/DNS config is properly removed from related files in VR, + # so the infinite duration of lease does not cause any issues or garbage. + # There will be soon a PR which also regenerates the /var/lib/misc/dnsmasq.leases (active lease DB file) + # in the new VR (when restarting network with cleanup), which will help around RHEL edge cases (described above) + # for the VMs who are already running in productions systems with 30d lease time. + lease = randint(870000, 870010) if entry['default_entry']: self.cloud.add("%s,%s,%s,%sh" % (entry['mac_address'], From 02714f263e34deca91966239553097ded123302f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Aur=C3=A8le=20Brothier?= Date: Wed, 6 Nov 2019 18:00:29 +0100 Subject: [PATCH 2/4] client: jetty session timeout set after server is started (#3658) fixed inability to set a custom session duration via server.properties on mgmt server. --- .../java/org/apache/cloudstack/ServerDaemon.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/client/src/main/java/org/apache/cloudstack/ServerDaemon.java b/client/src/main/java/org/apache/cloudstack/ServerDaemon.java index 1e47aef45a3..12c5f981466 100644 --- a/client/src/main/java/org/apache/cloudstack/ServerDaemon.java +++ b/client/src/main/java/org/apache/cloudstack/ServerDaemon.java @@ -24,6 +24,7 @@ import java.lang.management.ManagementFactory; import java.net.URL; import java.util.Properties; +import com.cloud.utils.Pair; import org.apache.commons.daemon.Daemon; import org.apache.commons.daemon.DaemonContext; import org.eclipse.jetty.jmx.MBeanContainer; @@ -40,6 +41,7 @@ import org.eclipse.jetty.server.handler.HandlerCollection; import org.eclipse.jetty.server.handler.MovedContextHandler; import org.eclipse.jetty.server.handler.RequestLogHandler; import org.eclipse.jetty.server.handler.gzip.GzipHandler; +import org.eclipse.jetty.server.session.SessionHandler; import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.util.thread.QueuedThreadPool; import org.eclipse.jetty.util.thread.ScheduledExecutorScheduler; @@ -175,7 +177,8 @@ public class ServerDaemon implements Daemon { createHttpConnector(httpConfig); // Setup handlers - server.setHandler(createHandlers()); + Pair pair = createHandlers(); + server.setHandler(pair.second()); // Extra config options server.setStopAtShutdown(true); @@ -184,6 +187,8 @@ public class ServerDaemon implements Daemon { createHttpsConnector(httpConfig); server.start(); + // Must set the session timeout after the server has started + pair.first().setMaxInactiveInterval(sessionTimeout * 60); server.join(); } @@ -236,11 +241,10 @@ public class ServerDaemon implements Daemon { } } - private HandlerCollection createHandlers() { + private Pair createHandlers() { final WebAppContext webApp = new WebAppContext(); webApp.setContextPath(contextPath); webApp.setInitParameter("org.eclipse.jetty.servlet.Default.dirAllowed", "false"); - webApp.getSessionHandler().setMaxInactiveInterval(sessionTimeout * 60); // GZIP handler final GzipHandler gzipHandler = new GzipHandler(); @@ -259,14 +263,14 @@ public class ServerDaemon implements Daemon { final RequestLogHandler log = new RequestLogHandler(); log.setRequestLog(createRequestLog()); - // Redirect root context handler + // Redirect root context handler_war MovedContextHandler rootRedirect = new MovedContextHandler(); rootRedirect.setContextPath("/"); rootRedirect.setNewContextURL(contextPath); rootRedirect.setPermanent(true); // Put rootRedirect at the end! - return new HandlerCollection(log, gzipHandler, rootRedirect); + return new Pair<>(webApp.getSessionHandler(), new HandlerCollection(log, gzipHandler, rootRedirect)); } private RequestLog createRequestLog() { From 701f606dc9d8791e9df34686af52382fb00b1cbf Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Thu, 14 Nov 2019 19:08:34 +0530 Subject: [PATCH 3/4] ui: fix migrate host form no host popup (#3682) Signed-off-by: Abhishek Kumar --- ui/scripts/instances.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ui/scripts/instances.js b/ui/scripts/instances.js index 201796375b1..3e54a77c084 100644 --- a/ui/scripts/instances.js +++ b/ui/scripts/instances.js @@ -2004,14 +2004,10 @@ args.response.success({ data: items }); - } else if(args.page == 1) { + } else { args.response.success({ data: null }); - } else { - cloudStack.dialog.notice({ - message: _l('message.no.more.hosts.available') - }); } } }); From d44dc0730b1bde5465f56522ce14eb8346a6002d Mon Sep 17 00:00:00 2001 From: dahn Date: Tue, 7 Jan 2020 09:12:41 +0100 Subject: [PATCH 4/4] only update powerstate if sure it is the latest (#3743) --- .../cloud/agent/manager/AgentManagerImpl.java | 2 +- .../cloud/vm/VirtualMachineManagerImpl.java | 27 +++++++---- .../vm/VirtualMachinePowerStateSyncImpl.java | 45 +++++++++++++------ .../java/com/cloud/vm/dao/VMInstanceDao.java | 2 +- .../com/cloud/vm/dao/VMInstanceDaoImpl.java | 6 ++- .../vmware/resource/VmwareResource.java | 2 +- 6 files changed, 57 insertions(+), 27 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 60911319e33..68e27211603 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -1242,7 +1242,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl if (s_logger.isDebugEnabled()) { if (cmd instanceof PingRoutingCommand) { logD = false; - s_logger.debug("Ping from " + hostId + "(" + hostName + ")"); + s_logger.debug("Ping from Routing host " + hostId + "(" + hostName + ")"); s_logger.trace("SeqA " + hostId + "-" + request.getSequence() + ": Processing " + request); } else if (cmd instanceof PingCommand) { logD = false; diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 3a3c760c71f..712b534e505 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -4226,7 +4226,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac break; case PowerOff: - case PowerReportMissing: + case PowerReportMissing: // rigorously set to Migrating? or just do nothing until...? or create a missing state? + // for now handle in line with legacy as power off handlePowerOffReportWithNoPendingJobsOnVM(vm); break; @@ -4337,8 +4338,15 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac case Running: case Stopped: case Migrating: - s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it"); - if(vm.isHaEnabled() && vm.getState() == State.Running && HaVmRestartHostUp.value() && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) { + if (s_logger.isInfoEnabled()) { + s_logger.info( + String.format("VM %s is at %s and we received a %s report while there is no pending jobs on it" + , vm.getInstanceName(), vm.getState(), vm.getPowerState())); + } + if(vm.isHaEnabled() && vm.getState() == State.Running + && HaVmRestartHostUp.value() + && vm.getHypervisorType() != HypervisorType.VMware + && vm.getHypervisorType() != HypervisorType.Hyperv) { s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart"); if(!_haMgr.hasPendingHaWork(vm.getId())) { _haMgr.scheduleRestart(vm, true); @@ -4348,11 +4356,14 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac return; } - final VirtualMachineGuru vmGuru = getVmGuru(vm); - final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); - if (!sendStop(vmGuru, profile, true, true)) { - // In case StopCommand fails, don't proceed further - return; + // not when report is missing + if(PowerState.PowerOff.equals(vm.getPowerState())) { + final VirtualMachineGuru vmGuru = getVmGuru(vm); + final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); + if (!sendStop(vmGuru, profile, true, true)) { + // In case StopCommand fails, don't proceed further + return; + } } try { diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java index 80a422574c7..14043a0523d 100644 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java @@ -38,7 +38,6 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat @Inject MessageBus _messageBus; @Inject VMInstanceDao _instanceDao; - @Inject VirtualMachineManager _vmMgr; @Inject ManagementServiceConfiguration mgmtServiceConf; public VirtualMachinePowerStateSyncImpl() { @@ -69,25 +68,30 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat private void processReport(long hostId, Map translatedInfo) { - if (s_logger.isDebugEnabled()) + if (s_logger.isDebugEnabled()) { s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size()); + } for (Map.Entry entry : translatedInfo.entrySet()) { if (s_logger.isDebugEnabled()) s_logger.debug("VM state report. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue()); - if (_instanceDao.updatePowerState(entry.getKey(), hostId, entry.getValue())) { - if (s_logger.isDebugEnabled()) - s_logger.debug("VM state report is updated. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue()); + if (_instanceDao.updatePowerState(entry.getKey(), hostId, entry.getValue(), DateUtil.currentGMTTime())) { + if (s_logger.isInfoEnabled()) { + s_logger.info("VM state report is updated. host: " + hostId + ", vm id: " + entry.getKey() + ", power state: " + entry.getValue()); + } _messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE, PublishScope.GLOBAL, entry.getKey()); } else { - if (s_logger.isDebugEnabled()) - s_logger.debug("VM power state does not change, skip DB writing. vm id: " + entry.getKey()); + if (s_logger.isTraceEnabled()) { + s_logger.trace("VM power state does not change, skip DB writing. vm id: " + entry.getKey()); + } } } + // any state outdates should be checked against the time before this list was retrieved + Date startTime = DateUtil.currentGMTTime(); // for all running/stopping VMs, we provide monitoring of missing report List vmsThatAreMissingReport = _instanceDao.findByHostInStates(hostId, VirtualMachine.State.Running, VirtualMachine.State.Stopping, VirtualMachine.State.Starting); @@ -98,10 +102,12 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat it.remove(); } + // here we need to be wary of out of band migration as opposed to other, more unexpected state changes if (vmsThatAreMissingReport.size() > 0) { Date currentTime = DateUtil.currentGMTTime(); - if (s_logger.isDebugEnabled()) + if (s_logger.isDebugEnabled()) { s_logger.debug("Run missing VM report. current time: " + currentTime.getTime()); + } // 2 times of sync-update interval for graceful period long milliSecondsGracefullPeriod = mgmtServiceConf.getPingInterval() * 2000L; @@ -130,23 +136,34 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat } } - if (s_logger.isDebugEnabled()) - s_logger.debug("Detected missing VM. host: " + hostId + ", vm id: " + instance.getId() + - ", power state: PowerReportMissing, last state update: " + vmStateUpdateTime.getTime()); + if (s_logger.isInfoEnabled()) { + s_logger.info( + String.format("Detected missing VM. host: %l, vm id: %l(%s), power state: %s, last state update: %l" + , hostId + , instance.getId() + , instance.getUuid() + , VirtualMachine.PowerState.PowerReportMissing + , vmStateUpdateTime.getTime())); + } long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime(); if (milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) { s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period"); - if (_instanceDao.updatePowerState(instance.getId(), hostId, VirtualMachine.PowerState.PowerReportMissing)) { - if (s_logger.isDebugEnabled()) + // this is were a race condition might have happened if we don't re-fetch the instance; + // between the startime of this job and the currentTime of this missing-branch + // an update might have occurred that we should not override in case of out of band migration + if (_instanceDao.updatePowerState(instance.getId(), hostId, VirtualMachine.PowerState.PowerReportMissing, startTime)) { + if (s_logger.isDebugEnabled()) { s_logger.debug("VM state report is updated. host: " + hostId + ", vm id: " + instance.getId() + ", power state: PowerReportMissing "); + } _messageBus.publish(null, VirtualMachineManager.Topics.VM_POWER_STATE, PublishScope.GLOBAL, instance.getId()); } else { - if (s_logger.isDebugEnabled()) + if (s_logger.isDebugEnabled()) { s_logger.debug("VM power state does not change, skip DB writing. vm id: " + instance.getId()); + } } } else { s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has not passed graceful period yet"); diff --git a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java index ce4d46abc25..a7ed9225254 100755 --- a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java +++ b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDao.java @@ -139,7 +139,7 @@ public interface VMInstanceDao extends GenericDao, StateDao< List listStartingWithNoHostId(); - boolean updatePowerState(long instanceId, long powerHostId, VirtualMachine.PowerState powerState); + boolean updatePowerState(long instanceId, long powerHostId, VirtualMachine.PowerState powerState, Date wisdomEra); void resetVmPowerStateTracking(long instanceId); diff --git a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java index 405e023f10a..1945969f543 100755 --- a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -828,13 +828,15 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem } @Override - public boolean updatePowerState(final long instanceId, final long powerHostId, final VirtualMachine.PowerState powerState) { + public boolean updatePowerState(final long instanceId, final long powerHostId, final VirtualMachine.PowerState powerState, Date wisdomEra) { return Transaction.execute(new TransactionCallback() { @Override public Boolean doInTransaction(TransactionStatus status) { boolean needToUpdate = false; VMInstanceVO instance = findById(instanceId); - if (instance != null) { + if (instance != null + && (null == instance.getPowerStateUpdateTime() + || instance.getPowerStateUpdateTime().before(wisdomEra))) { Long savedPowerHostId = instance.getPowerHostId(); if (instance.getPowerState() != powerState || savedPowerHostId == null || savedPowerHostId.longValue() != powerHostId) { diff --git a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java index 02f758ba844..b5a1260a723 100644 --- a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java +++ b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java @@ -3817,7 +3817,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa } } else { - String msg = "VM " + cmd.getVmName() + " is no longer in vSphere"; + String msg = "VM " + cmd.getVmName() + " is no longer on the expected host in vSphere"; s_logger.info(msg); return new StopAnswer(cmd, msg, true); }