server: guard vm start inter-cluster migration with config (#7401)

During the start of a stopped VM when there is not enough capacity in the current cluster CloudStack can migrate it to a new cluster. This can be an expensive operation when Cluster scope storage is used as migration can be carried out using SSVM and secondary storage.
This PR allows controlling this behaviour with the existing global config - `migrate.vm.across.clusters`

Signed-off-by: Abhishek Kumar <abhishek.mrt22@gmail.com>
This commit is contained in:
Abhishek Kumar 2023-05-08 12:08:57 +05:30 committed by GitHub
parent b280370a98
commit e234c3ccdc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 171 additions and 49 deletions

View File

@ -16,6 +16,8 @@
// under the License.
package com.cloud.deploy;
import static com.cloud.utils.NumbersUtil.toHumanReadableSize;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@ -23,28 +25,17 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Timer;
import java.util.TreeSet;
import java.util.stream.Collectors;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import org.apache.cloudstack.affinity.AffinityGroupDomainMapVO;
import com.cloud.storage.VMTemplateVO;
import com.cloud.storage.dao.VMTemplateDao;
import com.cloud.user.AccountVO;
import com.cloud.user.dao.AccountDao;
import com.cloud.exception.StorageUnavailableException;
import com.cloud.utils.db.Filter;
import com.cloud.utils.fsm.StateMachine2;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.cloudstack.affinity.AffinityGroupProcessor;
import org.apache.cloudstack.affinity.AffinityGroupService;
import org.apache.cloudstack.affinity.AffinityGroupVMMapVO;
@ -57,6 +48,8 @@ import org.apache.cloudstack.engine.cloud.entity.api.db.dao.VMReservationDao;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStore;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.StoragePoolAllocator;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.messagebus.MessageBus;
import org.apache.cloudstack.framework.messagebus.MessageSubscriber;
@ -64,6 +57,9 @@ import org.apache.cloudstack.managed.context.ManagedContextTimerTask;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
@ -95,6 +91,7 @@ import com.cloud.deploy.dao.PlannerHostReservationDao;
import com.cloud.exception.AffinityConflictException;
import com.cloud.exception.ConnectionException;
import com.cloud.exception.InsufficientServerCapacityException;
import com.cloud.exception.StorageUnavailableException;
import com.cloud.gpu.GPU;
import com.cloud.host.DetailVO;
import com.cloud.host.Host;
@ -115,26 +112,32 @@ import com.cloud.storage.ScopeType;
import com.cloud.storage.StorageManager;
import com.cloud.storage.StoragePool;
import com.cloud.storage.StoragePoolHostVO;
import com.cloud.storage.VMTemplateVO;
import com.cloud.storage.Volume;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.dao.DiskOfferingDao;
import com.cloud.storage.dao.GuestOSCategoryDao;
import com.cloud.storage.dao.GuestOSDao;
import com.cloud.storage.dao.StoragePoolHostDao;
import com.cloud.storage.dao.VMTemplateDao;
import com.cloud.storage.dao.VolumeDao;
import com.cloud.user.AccountManager;
import com.cloud.user.AccountVO;
import com.cloud.user.dao.AccountDao;
import com.cloud.utils.DateUtil;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.Pair;
import com.cloud.utils.component.Manager;
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.Filter;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.db.TransactionCallback;
import com.cloud.utils.db.TransactionStatus;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.fsm.StateListener;
import com.cloud.utils.fsm.StateMachine2;
import com.cloud.vm.DiskProfile;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
@ -144,8 +147,6 @@ import com.cloud.vm.VirtualMachineProfile;
import com.cloud.vm.dao.UserVmDao;
import com.cloud.vm.dao.VMInstanceDao;
import static com.cloud.utils.NumbersUtil.toHumanReadableSize;
public class DeploymentPlanningManagerImpl extends ManagerBase implements DeploymentPlanningManager, Manager, Listener,
StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
@ -266,6 +267,35 @@ StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
_affinityProcessors = affinityProcessors;
}
protected void avoidOtherClustersForDeploymentIfMigrationDisabled(VirtualMachine vm, Host lastHost, ExcludeList avoids) {
if (lastHost == null || lastHost.getClusterId() == null ||
ConfigurationManagerImpl.MIGRATE_VM_ACROSS_CLUSTERS.valueIn(vm.getDataCenterId())) {
return;
}
List<VolumeVO> volumes = _volsDao.findUsableVolumesForInstance(vm.getId());
if (CollectionUtils.isEmpty(volumes)) {
return;
}
boolean storageMigrationNeededDuringClusterMigration = false;
for (Volume volume : volumes) {
StoragePoolVO pool = _storagePoolDao.findById(volume.getPoolId());
if (List.of(ScopeType.HOST, ScopeType.CLUSTER).contains(pool.getScope())) {
storageMigrationNeededDuringClusterMigration = true;
break;
}
}
if (!storageMigrationNeededDuringClusterMigration) {
return;
}
final Long lastHostClusterId = lastHost.getClusterId();
s_logger.warn(String.format("VM last host ID: %d belongs to zone ID: %s for which config - %s is false and storage migration would be needed for inter-cluster migration, therefore, adding all other clusters except ID: %d from this zone to avoid list",
lastHost.getId(), vm.getDataCenterId(), ConfigurationManagerImpl.MIGRATE_VM_ACROSS_CLUSTERS.key(), lastHostClusterId));
List<Long> clusterIds = _clusterDao.listAllClusters(lastHost.getDataCenterId());
Set<Long> existingAvoidedClusters = avoids.getClustersToAvoid();
clusterIds = clusterIds.stream().filter(x -> !Objects.equals(x, lastHostClusterId) && (existingAvoidedClusters == null || !existingAvoidedClusters.contains(x))).collect(Collectors.toList());
avoids.addClusterList(clusterIds);
}
@Override
public DeployDestination planDeployment(VirtualMachineProfile vmProfile, DeploymentPlan plan, ExcludeList avoids, DeploymentPlanner planner)
throws InsufficientServerCapacityException, AffinityConflictException {
@ -408,6 +438,8 @@ StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
planner = getDeploymentPlannerByName(plannerName);
}
Host lastHost = null;
String considerLastHostStr = (String)vmProfile.getParameter(VirtualMachineProfile.Param.ConsiderLastHost);
boolean considerLastHost = vm.getLastHostId() != null && haVmTag == null &&
(considerLastHostStr == null || Boolean.TRUE.toString().equalsIgnoreCase(considerLastHostStr));
@ -415,6 +447,7 @@ StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
s_logger.debug("This VM has last host_id specified, trying to choose the same host: " + vm.getLastHostId());
HostVO host = _hostDao.findById(vm.getLastHostId());
lastHost = host;
_hostDao.loadHostTags(host);
_hostDao.loadDetails(host);
ServiceOfferingDetailsVO offeringDetails = null;
@ -519,6 +552,8 @@ StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
s_logger.debug("Cannot choose the last host to deploy this VM ");
}
avoidOtherClustersForDeploymentIfMigrationDisabled(vm, lastHost, avoids);
DeployDestination dest = null;
List<Long> clusterList = null;

View File

@ -21,39 +21,31 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.dc.ClusterDetailsVO;
import com.cloud.dc.DataCenter;
import com.cloud.gpu.GPU;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.storage.DiskOfferingVO;
import com.cloud.storage.Storage;
import com.cloud.storage.StoragePool;
import com.cloud.storage.VMTemplateVO;
import com.cloud.storage.Volume;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.dao.VMTemplateDao;
import com.cloud.user.AccountVO;
import com.cloud.user.dao.AccountDao;
import com.cloud.utils.Pair;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.Type;
import com.cloud.vm.VirtualMachineProfile;
import com.cloud.vm.VirtualMachineProfileImpl;
import org.apache.cloudstack.affinity.AffinityGroupProcessor;
import org.apache.cloudstack.affinity.AffinityGroupService;
import org.apache.cloudstack.affinity.dao.AffinityGroupDao;
import org.apache.cloudstack.affinity.dao.AffinityGroupDomainMapDao;
import org.apache.cloudstack.affinity.dao.AffinityGroupVMMapDao;
import org.apache.cloudstack.engine.cloud.entity.api.db.dao.VMReservationDao;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.messagebus.MessageBus;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.test.utils.SpringUtils;
import org.apache.commons.collections.CollectionUtils;
import org.junit.Assert;
import org.junit.Before;
@ -79,22 +71,14 @@ import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.support.AnnotationConfigContextLoader;
import org.apache.cloudstack.affinity.AffinityGroupProcessor;
import org.apache.cloudstack.affinity.AffinityGroupService;
import org.apache.cloudstack.affinity.dao.AffinityGroupDao;
import org.apache.cloudstack.affinity.dao.AffinityGroupVMMapDao;
import org.apache.cloudstack.engine.cloud.entity.api.db.dao.VMReservationDao;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.messagebus.MessageBus;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.test.utils.SpringUtils;
import com.cloud.agent.AgentManager;
import com.cloud.capacity.CapacityManager;
import com.cloud.capacity.dao.CapacityDao;
import com.cloud.configuration.ConfigurationManagerImpl;
import com.cloud.dc.ClusterDetailsDao;
import com.cloud.dc.ClusterDetailsVO;
import com.cloud.dc.ClusterVO;
import com.cloud.dc.DataCenter;
import com.cloud.dc.DataCenterVO;
import com.cloud.dc.dao.ClusterDao;
import com.cloud.dc.dao.DataCenterDao;
@ -105,26 +89,46 @@ import com.cloud.deploy.DeploymentPlanner.PlannerResourceUsage;
import com.cloud.deploy.dao.PlannerHostReservationDao;
import com.cloud.exception.AffinityConflictException;
import com.cloud.exception.InsufficientServerCapacityException;
import com.cloud.gpu.GPU;
import com.cloud.gpu.dao.HostGpuGroupsDao;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.host.dao.HostDetailsDao;
import com.cloud.host.dao.HostTagsDao;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceManager;
import com.cloud.org.Grouping.AllocationState;
import com.cloud.resource.ResourceManager;
import com.cloud.service.ServiceOfferingVO;
import com.cloud.service.dao.ServiceOfferingDetailsDao;
import com.cloud.storage.DiskOfferingVO;
import com.cloud.storage.ScopeType;
import com.cloud.storage.Storage;
import com.cloud.storage.StorageManager;
import com.cloud.storage.StoragePool;
import com.cloud.storage.VMTemplateVO;
import com.cloud.storage.Volume;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.dao.DiskOfferingDao;
import com.cloud.storage.dao.GuestOSCategoryDao;
import com.cloud.storage.dao.GuestOSDao;
import com.cloud.storage.dao.StoragePoolHostDao;
import com.cloud.storage.dao.VMTemplateDao;
import com.cloud.storage.dao.VolumeDao;
import com.cloud.user.AccountManager;
import com.cloud.user.AccountVO;
import com.cloud.user.dao.AccountDao;
import com.cloud.utils.Pair;
import com.cloud.utils.component.ComponentContext;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.Type;
import com.cloud.vm.VirtualMachineProfile;
import com.cloud.vm.VirtualMachineProfileImpl;
import com.cloud.vm.dao.UserVmDao;
import com.cloud.vm.dao.UserVmDetailsDao;
import com.cloud.vm.dao.VMInstanceDao;
import com.cloud.host.dao.HostDetailsDao;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(loader = AnnotationConfigContextLoader.class)
@ -191,6 +195,9 @@ public class DeploymentPlanningManagerImplTest {
@Inject
ClusterDetailsDao clusterDetailsDao;
@Inject
PrimaryDataStoreDao primaryDataStoreDao;
@Mock
Host host;
@ -1075,4 +1082,84 @@ public class DeploymentPlanningManagerImplTest {
Assert.assertEquals(6, hosts.get(6).getId());
Assert.assertEquals(2, hosts.get(7).getId());
}
private List<Long> prepareMockForAvoidOtherClustersForDeploymentIfMigrationDisabled(boolean configValue, boolean mockVolumes, boolean mockClusterStoreVolume) {
try {
Field f = ConfigKey.class.getDeclaredField("_defaultValue");
f.setAccessible(true);
f.set(ConfigurationManagerImpl.MIGRATE_VM_ACROSS_CLUSTERS, String.valueOf(configValue));
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new RuntimeException(e);
}
List<Long> allClusters = List.of(101L, 102L, 103L, 104L);
Mockito.when(_clusterDao.listAllClusters(Mockito.anyLong())).thenReturn(allClusters);
if (mockVolumes) {
VolumeVO vol1 = Mockito.mock(VolumeVO.class);
Mockito.when(vol1.getPoolId()).thenReturn(1L);
VolumeVO vol2 = Mockito.mock(VolumeVO.class);
Mockito.when(vol2.getPoolId()).thenReturn(2L);
StoragePoolVO pool1 = Mockito.mock(StoragePoolVO.class);
Mockito.when(pool1.getScope()).thenReturn(ScopeType.ZONE);
Mockito.when(primaryDataStoreDao.findById(1L)).thenReturn(pool1);
StoragePoolVO pool2 = Mockito.mock(StoragePoolVO.class);
Mockito.when(pool2.getScope()).thenReturn(mockClusterStoreVolume ? ScopeType.CLUSTER : ScopeType.GLOBAL);
Mockito.when(primaryDataStoreDao.findById(2L)).thenReturn(pool2);
Mockito.when(volDao.findUsableVolumesForInstance(1L)).thenReturn(List.of(vol1, vol2));
} else {
Mockito.when(volDao.findUsableVolumesForInstance(1L)).thenReturn(new ArrayList<>());
}
return allClusters;
}
@Test
public void avoidOtherClustersForDeploymentIfMigrationDisabledNonValidHost() {
prepareMockForAvoidOtherClustersForDeploymentIfMigrationDisabled(false, false, false);
VirtualMachine vm = Mockito.mock(VirtualMachine.class);
ExcludeList excludeList = new ExcludeList();
_dpm.avoidOtherClustersForDeploymentIfMigrationDisabled(vm, null, excludeList);
Assert.assertTrue(CollectionUtils.isEmpty(excludeList.getClustersToAvoid()));
Host lastHost = Mockito.mock(Host.class);
Mockito.when(lastHost.getClusterId()).thenReturn(null);
_dpm.avoidOtherClustersForDeploymentIfMigrationDisabled(vm, lastHost, excludeList);
Assert.assertTrue(CollectionUtils.isEmpty(excludeList.getClustersToAvoid()));
}
private Set<Long> runAvoidOtherClustersForDeploymentIfMigrationDisabledTest() {
VirtualMachine vm = Mockito.mock(VirtualMachine.class);
Mockito.when(vm.getId()).thenReturn(1L);
ExcludeList excludeList = new ExcludeList();
Host lastHost = Mockito.mock(Host.class);
Long sourceClusterId = 101L;
Mockito.when(lastHost.getClusterId()).thenReturn(sourceClusterId);
_dpm.avoidOtherClustersForDeploymentIfMigrationDisabled(vm, lastHost, excludeList);
return excludeList.getClustersToAvoid();
}
@Test
public void avoidOtherClustersForDeploymentIfMigrationDisabledConfigAllows() {
prepareMockForAvoidOtherClustersForDeploymentIfMigrationDisabled(true,false, false);
Assert.assertTrue(CollectionUtils.isEmpty(runAvoidOtherClustersForDeploymentIfMigrationDisabledTest()));
}
@Test
public void avoidOtherClustersForDeploymentIfMigrationDisabledNoVmVolumes() {
prepareMockForAvoidOtherClustersForDeploymentIfMigrationDisabled(false,false, false);
Assert.assertTrue(CollectionUtils.isEmpty(runAvoidOtherClustersForDeploymentIfMigrationDisabledTest()));
}
@Test
public void avoidOtherClustersForDeploymentIfMigrationDisabledVmVolumesNonValidScope() {
prepareMockForAvoidOtherClustersForDeploymentIfMigrationDisabled(false,true, false);
Assert.assertTrue(CollectionUtils.isEmpty(runAvoidOtherClustersForDeploymentIfMigrationDisabledTest()));
}
@Test
public void avoidOtherClustersForDeploymentIfMigrationDisabledValid() {
List<Long> allClusters = prepareMockForAvoidOtherClustersForDeploymentIfMigrationDisabled(false,true, true);
Set<Long> avoidedClusters = runAvoidOtherClustersForDeploymentIfMigrationDisabledTest();
Assert.assertTrue(CollectionUtils.isNotEmpty(avoidedClusters));
Assert.assertEquals(allClusters.size()-1, avoidedClusters.size());
Assert.assertFalse(avoidedClusters.contains(allClusters.get(0)));
}
}