From f34469a41b5eedade04790e7f2b3306294b3b4f5 Mon Sep 17 00:00:00 2001 From: Harikrishna Patnala Date: Wed, 1 Mar 2017 14:30:51 +0530 Subject: [PATCH] CLOUDSTACK-9112: deployVM thread is holding the global lock on network longer and cause delays and some improvements in the planner There are some VM deployment failures happening when multiple VMs are deployed at a time, failures mainly due to NetworkModel code that iterates over all the vlans in the pod. This causes each deployVM thread to hold the global lock on Network longer and cause delays. This delay in turn causes more threads to choose same host and fail since capacity is not available on that host. Following are some changes required to be done to reduce delays during VM deployments which in turn causes some vm deployment failures when multiple VMs are launched at a time. In Planner, remove the clusters that do not contain a host with matching service offering tag. This will save some iterations over clusters that dont have matching tagged host In NetworkModel, do not query the vlans for the pod within the loop. Also optimized the logic to query the ip/ipv6 In DeploymentPlanningManagerImpl, do not process the affinity group if the plan has hostId provided. --- .../src/com/cloud/host/dao/HostDao.java | 2 + .../src/com/cloud/host/dao/HostDaoImpl.java | 25 ++++++ .../deploy/DeploymentPlanningManagerImpl.java | 78 ++++++++++--------- .../src/com/cloud/deploy/FirstFitPlanner.java | 16 ++++ .../com/cloud/network/NetworkModelImpl.java | 27 ++++--- .../com/cloud/vm/FirstFitPlannerTest.java | 30 +++++++ 6 files changed, 130 insertions(+), 48 deletions(-) diff --git a/engine/schema/src/com/cloud/host/dao/HostDao.java b/engine/schema/src/com/cloud/host/dao/HostDao.java index 88a354725a9..7ffe1ed2ed5 100644 --- a/engine/schema/src/com/cloud/host/dao/HostDao.java +++ b/engine/schema/src/com/cloud/host/dao/HostDao.java @@ -98,5 +98,7 @@ public interface HostDao extends GenericDao, StateDao listClustersByHostTag(String hostTagOnOffering); + List listByType(Type type); } diff --git a/engine/schema/src/com/cloud/host/dao/HostDaoImpl.java b/engine/schema/src/com/cloud/host/dao/HostDaoImpl.java index 46f8a4da2bc..a74b908457f 100644 --- a/engine/schema/src/com/cloud/host/dao/HostDaoImpl.java +++ b/engine/schema/src/com/cloud/host/dao/HostDaoImpl.java @@ -77,6 +77,8 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao private static final Logger status_logger = Logger.getLogger(Status.class); private static final Logger state_logger = Logger.getLogger(ResourceState.class); + private static final String LIST_CLUSTERID_FOR_HOST_TAG = "select distinct cluster_id from host join host_tags on host.id = host_tags.host_id and host_tags.tag = ?"; + protected SearchBuilder TypePodDcStatusSearch; protected SearchBuilder IdStatusSearch; @@ -1129,6 +1131,29 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao return customSearch(sc, null); } + @Override + public List listClustersByHostTag(String hostTagOnOffering) { + TransactionLegacy txn = TransactionLegacy.currentTxn(); + PreparedStatement pstmt = null; + List result = new ArrayList(); + StringBuilder sql = new StringBuilder(LIST_CLUSTERID_FOR_HOST_TAG); + // during listing the clusters that cross the threshold + // we need to check with disabled thresholds of each cluster if not defined at cluster consider the global value + try { + pstmt = txn.prepareAutoCloseStatement(sql.toString()); + pstmt.setString(1, hostTagOnOffering); + ResultSet rs = pstmt.executeQuery(); + while (rs.next()) { + result.add(rs.getLong(1)); + } + return result; + } catch (SQLException e) { + throw new CloudRuntimeException("DB Exception on: " + sql, e); + } catch (Throwable e) { + throw new CloudRuntimeException("Caught: " + sql, e); + } + } + @Override public List listAllHostsByType(Host.Type type) { SearchCriteria sc = TypeSearch.create(); diff --git a/server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java b/server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java index ef0ad19079f..d863962b17c 100644 --- a/server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java +++ b/server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java @@ -249,50 +249,16 @@ StateListener { public DeployDestination planDeployment(VirtualMachineProfile vmProfile, DeploymentPlan plan, ExcludeList avoids, DeploymentPlanner planner) throws InsufficientServerCapacityException, AffinityConflictException { - // call affinitygroup chain + ServiceOffering offering = vmProfile.getServiceOffering(); + int cpu_requested = offering.getCpu() * offering.getSpeed(); + long ram_requested = offering.getRamSize() * 1024L * 1024L; VirtualMachine vm = vmProfile.getVirtualMachine(); - long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId()); DataCenter dc = _dcDao.findById(vm.getDataCenterId()); - if (vmGroupCount > 0) { - for (AffinityGroupProcessor processor : _affinityProcessors) { - processor.process(vmProfile, plan, avoids); - } - } if (vm.getType() == VirtualMachine.Type.User || vm.getType() == VirtualMachine.Type.DomainRouter) { checkForNonDedicatedResources(vmProfile, dc, avoids); } - if (s_logger.isDebugEnabled()) { - s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid()); - } - - // call planners - //DataCenter dc = _dcDao.findById(vm.getDataCenterId()); - // check if datacenter is in avoid set - if (avoids.shouldAvoid(dc)) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning."); - } - return null; - } - - ServiceOffering offering = vmProfile.getServiceOffering(); - if(planner == null){ - String plannerName = offering.getDeploymentPlanner(); - if (plannerName == null) { - if (vm.getHypervisorType() == HypervisorType.BareMetal) { - plannerName = "BareMetalPlanner"; - } else { - plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key()); - } - } - planner = getDeploymentPlannerByName(plannerName); - } - - int cpu_requested = offering.getCpu() * offering.getSpeed(); - long ram_requested = offering.getRamSize() * 1024L * 1024L; - if (s_logger.isDebugEnabled()) { s_logger.debug("DeploymentPlanner allocation algorithm: " + planner); @@ -364,6 +330,44 @@ StateListener { return null; } + // call affinitygroup chain + long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId()); + + if (vmGroupCount > 0) { + for (AffinityGroupProcessor processor : _affinityProcessors) { + processor.process(vmProfile, plan, avoids); + } + } + + if (vm.getType() == VirtualMachine.Type.User) { + checkForNonDedicatedResources(vmProfile, dc, avoids); + } + if (s_logger.isDebugEnabled()) { + s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid()); + } + + // call planners + // DataCenter dc = _dcDao.findById(vm.getDataCenterId()); + // check if datacenter is in avoid set + if (avoids.shouldAvoid(dc)) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning."); + } + return null; + } + + if (planner == null) { + String plannerName = offering.getDeploymentPlanner(); + if (plannerName == null) { + if (vm.getHypervisorType() == HypervisorType.BareMetal) { + plannerName = "BareMetalPlanner"; + } else { + plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key()); + } + } + planner = getDeploymentPlannerByName(plannerName); + } + if (vm.getLastHostId() != null && haVmTag == null) { s_logger.debug("This VM has last host_id specified, trying to choose the same host: " + vm.getLastHostId()); diff --git a/server/src/com/cloud/deploy/FirstFitPlanner.java b/server/src/com/cloud/deploy/FirstFitPlanner.java index c3df4174ada..5760e24ce4d 100644 --- a/server/src/com/cloud/deploy/FirstFitPlanner.java +++ b/server/src/com/cloud/deploy/FirstFitPlanner.java @@ -393,6 +393,10 @@ public class FirstFitPlanner extends AdapterBase implements DeploymentClusterPla } removeClustersCrossingThreshold(prioritizedClusterIds, avoid, vmProfile, plan); + String hostTagOnOffering = offering.getHostTag(); + if (hostTagOnOffering != null) { + removeClustersWithoutMatchingTag(prioritizedClusterIds, hostTagOnOffering); + } } else { if (s_logger.isDebugEnabled()) { @@ -520,6 +524,18 @@ public class FirstFitPlanner extends AdapterBase implements DeploymentClusterPla } + private void removeClustersWithoutMatchingTag(List clusterListForVmAllocation, String hostTagOnOffering) { + + List matchingClusters = hostDao.listClustersByHostTag(hostTagOnOffering); + + clusterListForVmAllocation.retainAll(matchingClusters); + + if (s_logger.isDebugEnabled()) { + s_logger.debug("The clusterId list for the given offering tag: " + clusterListForVmAllocation); + } + + } + private boolean isRootAdmin(VirtualMachineProfile vmProfile) { if (vmProfile != null) { if (vmProfile.getOwner() != null) { diff --git a/server/src/com/cloud/network/NetworkModelImpl.java b/server/src/com/cloud/network/NetworkModelImpl.java index 131da26a370..28c31aee144 100644 --- a/server/src/com/cloud/network/NetworkModelImpl.java +++ b/server/src/com/cloud/network/NetworkModelImpl.java @@ -2217,24 +2217,29 @@ public class NetworkModelImpl extends ManagerBase implements NetworkModel { @Override public NicVO getPlaceholderNicForRouter(Network network, Long podId) { List nics = _nicDao.listPlaceholderNicsByNetworkIdAndVmType(network.getId(), VirtualMachine.Type.DomainRouter); + List vlans = new ArrayList(); + if (podId != null) { + vlans = _vlanDao.listVlansForPod(podId); + } for (NicVO nic : nics) { if (nic.getReserver() == null && (nic.getIPv4Address() != null || nic.getIPv6Address() != null)) { if (podId == null) { return nic; } else { + IpAddress ip = null; + UserIpv6AddressVO ipv6 = null; + + if (nic.getIPv4Address() != null) { + ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address()); + } else { + ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address()); + } //return nic only when its ip address belong to the pod range (for the Basic zone case) - List vlans = _vlanDao.listVlansForPod(podId); for (Vlan vlan : vlans) { - if (nic.getIPv4Address() != null) { - IpAddress ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address()); - if (ip != null && ip.getVlanId() == vlan.getId()) { - return nic; - } - } else { - UserIpv6AddressVO ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address()); - if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) { - return nic; - } + if (ip != null && ip.getVlanId() == vlan.getId()) { + return nic; + } else if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) { + return nic; } } } diff --git a/server/test/com/cloud/vm/FirstFitPlannerTest.java b/server/test/com/cloud/vm/FirstFitPlannerTest.java index 41dd8c0922e..85463de8c8b 100644 --- a/server/test/com/cloud/vm/FirstFitPlannerTest.java +++ b/server/test/com/cloud/vm/FirstFitPlannerTest.java @@ -30,6 +30,7 @@ import java.util.Map; import javax.inject.Inject; +import com.cloud.offering.ServiceOffering; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; import org.apache.cloudstack.framework.config.ConfigDepot; @@ -127,6 +128,8 @@ public class FirstFitPlannerTest { ConfigDepotImpl configDepot; @Inject ScopedConfigStorage scopedStorage; + @Inject + HostDao hostDao; private static long domainId = 1L; long dataCenterId = 1L; @@ -192,6 +195,33 @@ public class FirstFitPlannerTest { assertTrue("Reordered cluster list have clusters exceeding threshold", (!clusterList.containsAll(clustersCrossingThreshold))); } + @Test + public void checkClusterListBasedOnHostTag() throws InsufficientServerCapacityException { + VirtualMachineProfileImpl vmProfile = mock(VirtualMachineProfileImpl.class); + DataCenterDeployment plan = mock(DataCenterDeployment.class); + ExcludeList avoids = mock(ExcludeList.class); + initializeForTest(vmProfile, plan, avoids); + List matchingClusters = initializeForClusterListBasedOnHostTag(vmProfile.getServiceOffering()); + + List clusterList = planner.orderClusters(vmProfile, plan, avoids); + + assertTrue("Reordered cluster list have clusters which has hosts with specified host tag on offering", (clusterList.containsAll(matchingClusters))); + assertTrue("Reordered cluster list does not have clusters which dont have hosts with matching host tag on offering", (!clusterList.contains(2L))); + } + + private List initializeForClusterListBasedOnHostTag(ServiceOffering offering) { + + + when(offering.getHostTag()).thenReturn("hosttag1"); + initializeForClusterThresholdDisabled(); + List matchingClusters = new ArrayList<>(); + matchingClusters.add(3L); + matchingClusters.add(5L); + when(hostDao.listClustersByHostTag("hosttag1")).thenReturn(matchingClusters); + + return matchingClusters; + } + @Test public void checkClusterReorderingForStartVMWithThresholdCheckDisabled() throws InsufficientServerCapacityException { VirtualMachineProfileImpl vmProfile = mock(VirtualMachineProfileImpl.class);