CLOUDSTACK-9112: deployVM thread is holding the global lock on network longer and cause delays and some improvements in the planner

There are some VM deployment failures happening when multiple VMs are deployed at a time, failures mainly due to NetworkModel code that iterates over all the vlans in the pod. This causes each deployVM thread to hold the global lock on Network longer and cause delays. This delay in turn causes more threads to choose same host and fail since capacity is not available on that host.

Following are some changes required to be done to reduce delays during VM deployments which in turn causes some vm deployment failures when multiple VMs are launched at a time.

    In Planner, remove the clusters that do not contain a host with matching service offering tag. This will save some iterations over clusters that dont have matching tagged host
    In NetworkModel, do not query the vlans for the pod within the loop. Also optimized the logic to query the ip/ipv6
    In DeploymentPlanningManagerImpl, do not process the affinity group if the plan has hostId provided.
This commit is contained in:
Harikrishna Patnala 2017-03-01 14:30:51 +05:30
parent 1decf5366d
commit f34469a41b
6 changed files with 130 additions and 48 deletions

View File

@ -98,5 +98,7 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
HostVO findByPublicIp(String publicIp);
List<Long> listClustersByHostTag(String hostTagOnOffering);
List<HostVO> listByType(Type type);
}

View File

@ -77,6 +77,8 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
private static final Logger status_logger = Logger.getLogger(Status.class);
private static final Logger state_logger = Logger.getLogger(ResourceState.class);
private static final String LIST_CLUSTERID_FOR_HOST_TAG = "select distinct cluster_id from host join host_tags on host.id = host_tags.host_id and host_tags.tag = ?";
protected SearchBuilder<HostVO> TypePodDcStatusSearch;
protected SearchBuilder<HostVO> IdStatusSearch;
@ -1129,6 +1131,29 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return customSearch(sc, null);
}
@Override
public List<Long> listClustersByHostTag(String hostTagOnOffering) {
TransactionLegacy txn = TransactionLegacy.currentTxn();
PreparedStatement pstmt = null;
List<Long> result = new ArrayList<Long>();
StringBuilder sql = new StringBuilder(LIST_CLUSTERID_FOR_HOST_TAG);
// during listing the clusters that cross the threshold
// we need to check with disabled thresholds of each cluster if not defined at cluster consider the global value
try {
pstmt = txn.prepareAutoCloseStatement(sql.toString());
pstmt.setString(1, hostTagOnOffering);
ResultSet rs = pstmt.executeQuery();
while (rs.next()) {
result.add(rs.getLong(1));
}
return result;
} catch (SQLException e) {
throw new CloudRuntimeException("DB Exception on: " + sql, e);
} catch (Throwable e) {
throw new CloudRuntimeException("Caught: " + sql, e);
}
}
@Override
public List<HostVO> listAllHostsByType(Host.Type type) {
SearchCriteria<HostVO> sc = TypeSearch.create();

View File

@ -249,50 +249,16 @@ StateListener<State, VirtualMachine.Event, VirtualMachine> {
public DeployDestination planDeployment(VirtualMachineProfile vmProfile, DeploymentPlan plan, ExcludeList avoids, DeploymentPlanner planner)
throws InsufficientServerCapacityException, AffinityConflictException {
// call affinitygroup chain
ServiceOffering offering = vmProfile.getServiceOffering();
int cpu_requested = offering.getCpu() * offering.getSpeed();
long ram_requested = offering.getRamSize() * 1024L * 1024L;
VirtualMachine vm = vmProfile.getVirtualMachine();
long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId());
DataCenter dc = _dcDao.findById(vm.getDataCenterId());
if (vmGroupCount > 0) {
for (AffinityGroupProcessor processor : _affinityProcessors) {
processor.process(vmProfile, plan, avoids);
}
}
if (vm.getType() == VirtualMachine.Type.User || vm.getType() == VirtualMachine.Type.DomainRouter) {
checkForNonDedicatedResources(vmProfile, dc, avoids);
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
}
// call planners
//DataCenter dc = _dcDao.findById(vm.getDataCenterId());
// check if datacenter is in avoid set
if (avoids.shouldAvoid(dc)) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
}
return null;
}
ServiceOffering offering = vmProfile.getServiceOffering();
if(planner == null){
String plannerName = offering.getDeploymentPlanner();
if (plannerName == null) {
if (vm.getHypervisorType() == HypervisorType.BareMetal) {
plannerName = "BareMetalPlanner";
} else {
plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key());
}
}
planner = getDeploymentPlannerByName(plannerName);
}
int cpu_requested = offering.getCpu() * offering.getSpeed();
long ram_requested = offering.getRamSize() * 1024L * 1024L;
if (s_logger.isDebugEnabled()) {
s_logger.debug("DeploymentPlanner allocation algorithm: " + planner);
@ -364,6 +330,44 @@ StateListener<State, VirtualMachine.Event, VirtualMachine> {
return null;
}
// call affinitygroup chain
long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId());
if (vmGroupCount > 0) {
for (AffinityGroupProcessor processor : _affinityProcessors) {
processor.process(vmProfile, plan, avoids);
}
}
if (vm.getType() == VirtualMachine.Type.User) {
checkForNonDedicatedResources(vmProfile, dc, avoids);
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
}
// call planners
// DataCenter dc = _dcDao.findById(vm.getDataCenterId());
// check if datacenter is in avoid set
if (avoids.shouldAvoid(dc)) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
}
return null;
}
if (planner == null) {
String plannerName = offering.getDeploymentPlanner();
if (plannerName == null) {
if (vm.getHypervisorType() == HypervisorType.BareMetal) {
plannerName = "BareMetalPlanner";
} else {
plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key());
}
}
planner = getDeploymentPlannerByName(plannerName);
}
if (vm.getLastHostId() != null && haVmTag == null) {
s_logger.debug("This VM has last host_id specified, trying to choose the same host: " + vm.getLastHostId());

View File

@ -393,6 +393,10 @@ public class FirstFitPlanner extends AdapterBase implements DeploymentClusterPla
}
removeClustersCrossingThreshold(prioritizedClusterIds, avoid, vmProfile, plan);
String hostTagOnOffering = offering.getHostTag();
if (hostTagOnOffering != null) {
removeClustersWithoutMatchingTag(prioritizedClusterIds, hostTagOnOffering);
}
} else {
if (s_logger.isDebugEnabled()) {
@ -520,6 +524,18 @@ public class FirstFitPlanner extends AdapterBase implements DeploymentClusterPla
}
private void removeClustersWithoutMatchingTag(List<Long> clusterListForVmAllocation, String hostTagOnOffering) {
List<Long> matchingClusters = hostDao.listClustersByHostTag(hostTagOnOffering);
clusterListForVmAllocation.retainAll(matchingClusters);
if (s_logger.isDebugEnabled()) {
s_logger.debug("The clusterId list for the given offering tag: " + clusterListForVmAllocation);
}
}
private boolean isRootAdmin(VirtualMachineProfile vmProfile) {
if (vmProfile != null) {
if (vmProfile.getOwner() != null) {

View File

@ -2217,24 +2217,29 @@ public class NetworkModelImpl extends ManagerBase implements NetworkModel {
@Override
public NicVO getPlaceholderNicForRouter(Network network, Long podId) {
List<NicVO> nics = _nicDao.listPlaceholderNicsByNetworkIdAndVmType(network.getId(), VirtualMachine.Type.DomainRouter);
List<? extends Vlan> vlans = new ArrayList<VlanVO>();
if (podId != null) {
vlans = _vlanDao.listVlansForPod(podId);
}
for (NicVO nic : nics) {
if (nic.getReserver() == null && (nic.getIPv4Address() != null || nic.getIPv6Address() != null)) {
if (podId == null) {
return nic;
} else {
IpAddress ip = null;
UserIpv6AddressVO ipv6 = null;
if (nic.getIPv4Address() != null) {
ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address());
} else {
ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address());
}
//return nic only when its ip address belong to the pod range (for the Basic zone case)
List<? extends Vlan> vlans = _vlanDao.listVlansForPod(podId);
for (Vlan vlan : vlans) {
if (nic.getIPv4Address() != null) {
IpAddress ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address());
if (ip != null && ip.getVlanId() == vlan.getId()) {
return nic;
}
} else {
UserIpv6AddressVO ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address());
if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) {
return nic;
}
if (ip != null && ip.getVlanId() == vlan.getId()) {
return nic;
} else if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) {
return nic;
}
}
}

View File

@ -30,6 +30,7 @@ import java.util.Map;
import javax.inject.Inject;
import com.cloud.offering.ServiceOffering;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.framework.config.ConfigDepot;
@ -127,6 +128,8 @@ public class FirstFitPlannerTest {
ConfigDepotImpl configDepot;
@Inject
ScopedConfigStorage scopedStorage;
@Inject
HostDao hostDao;
private static long domainId = 1L;
long dataCenterId = 1L;
@ -192,6 +195,33 @@ public class FirstFitPlannerTest {
assertTrue("Reordered cluster list have clusters exceeding threshold", (!clusterList.containsAll(clustersCrossingThreshold)));
}
@Test
public void checkClusterListBasedOnHostTag() throws InsufficientServerCapacityException {
VirtualMachineProfileImpl vmProfile = mock(VirtualMachineProfileImpl.class);
DataCenterDeployment plan = mock(DataCenterDeployment.class);
ExcludeList avoids = mock(ExcludeList.class);
initializeForTest(vmProfile, plan, avoids);
List<Long> matchingClusters = initializeForClusterListBasedOnHostTag(vmProfile.getServiceOffering());
List<Long> clusterList = planner.orderClusters(vmProfile, plan, avoids);
assertTrue("Reordered cluster list have clusters which has hosts with specified host tag on offering", (clusterList.containsAll(matchingClusters)));
assertTrue("Reordered cluster list does not have clusters which dont have hosts with matching host tag on offering", (!clusterList.contains(2L)));
}
private List<Long> initializeForClusterListBasedOnHostTag(ServiceOffering offering) {
when(offering.getHostTag()).thenReturn("hosttag1");
initializeForClusterThresholdDisabled();
List<Long> matchingClusters = new ArrayList<>();
matchingClusters.add(3L);
matchingClusters.add(5L);
when(hostDao.listClustersByHostTag("hosttag1")).thenReturn(matchingClusters);
return matchingClusters;
}
@Test
public void checkClusterReorderingForStartVMWithThresholdCheckDisabled() throws InsufficientServerCapacityException {
VirtualMachineProfileImpl vmProfile = mock(VirtualMachineProfileImpl.class);