diff --git a/api/src/main/java/com/cloud/agent/api/Command.java b/api/src/main/java/com/cloud/agent/api/Command.java index 4a698e46bde..b3c6120462a 100644 --- a/api/src/main/java/com/cloud/agent/api/Command.java +++ b/api/src/main/java/com/cloud/agent/api/Command.java @@ -37,6 +37,7 @@ public abstract class Command { @LogLevel(Log4jLevel.Trace) protected Map contextMap = new HashMap(); private int wait; //in second + private boolean bypassHostMaintenance = false; protected Command() { this.wait = 0; @@ -74,6 +75,14 @@ public abstract class Command { return true; } + public boolean isBypassHostMaintenance() { + return bypassHostMaintenance; + } + + public void setBypassHostMaintenance(boolean bypassHostMaintenance) { + this.bypassHostMaintenance = bypassHostMaintenance; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java index 45df2311f3a..8810465d794 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java @@ -31,10 +31,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import com.cloud.agent.api.ModifySshKeysCommand; -import com.cloud.agent.api.ModifyStoragePoolCommand; import org.apache.cloudstack.agent.lb.SetupMSListCommand; -import com.cloud.agent.api.RollingMaintenanceCommand; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.log4j.Logger; @@ -48,10 +45,13 @@ import com.cloud.agent.api.CleanupNetworkRulesCmd; import com.cloud.agent.api.Command; import com.cloud.agent.api.MaintainCommand; import com.cloud.agent.api.MigrateCommand; +import com.cloud.agent.api.ModifySshKeysCommand; +import com.cloud.agent.api.ModifyStoragePoolCommand; import com.cloud.agent.api.ModifyTargetsCommand; import com.cloud.agent.api.PingTestCommand; import com.cloud.agent.api.PvlanSetupCommand; import com.cloud.agent.api.ReadyCommand; +import com.cloud.agent.api.RollingMaintenanceCommand; import com.cloud.agent.api.SetupCommand; import com.cloud.agent.api.ShutdownCommand; import com.cloud.agent.api.StartCommand; @@ -167,7 +167,7 @@ public abstract class AgentAttache { if (_maintenance) { for (final Command cmd : cmds) { - if (Arrays.binarySearch(s_commandsAllowedInMaintenanceMode, cmd.getClass().toString()) < 0) { + if (Arrays.binarySearch(s_commandsAllowedInMaintenanceMode, cmd.getClass().toString()) < 0 && !cmd.isBypassHostMaintenance()) { throw new AgentUnavailableException("Unable to send " + cmd.getClass().toString() + " because agent " + _name + " is in maintenance mode", _id); } } diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 6967e74f997..2dd1791f0e5 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -519,6 +519,11 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac advanceExpunge(vm); } + private boolean expungeCommandCanBypassHostMaintenance(VirtualMachine vm) { + return VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) || + VirtualMachine.Type.ConsoleProxy.equals(vm.getType()); + } + protected void advanceExpunge(VMInstanceVO vm) throws ResourceUnavailableException, OperationTimedoutException, ConcurrentOperationException { if (vm == null || vm.getRemoved() != null) { if (s_logger.isDebugEnabled()) { @@ -565,6 +570,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac final Commands cmds = new Commands(Command.OnError.Stop); for (final Command volumeExpungeCommand : volumeExpungeCommands) { + volumeExpungeCommand.setBypassHostMaintenance(expungeCommandCanBypassHostMaintenance(vm)); cmds.addCommand(volumeExpungeCommand); } @@ -606,10 +612,12 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac if (hostId != null) { final Commands cmds = new Commands(Command.OnError.Stop); for (final Command command : finalizeExpungeCommands) { + command.setBypassHostMaintenance(expungeCommandCanBypassHostMaintenance(vm)); cmds.addCommand(command); } if (nicExpungeCommands != null) { for (final Command command : nicExpungeCommands) { + command.setBypassHostMaintenance(expungeCommandCanBypassHostMaintenance(vm)); cmds.addCommand(command); } } diff --git a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java index 6ce874107b3..65b55f72c39 100644 --- a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java +++ b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java @@ -18,14 +18,14 @@ */ package org.apache.cloudstack.storage.datastore.driver; +import static com.cloud.utils.NumbersUtil.toHumanReadableSize; + import java.util.HashMap; import java.util.Map; import java.util.UUID; import javax.inject.Inject; -import org.apache.log4j.Logger; - import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo; import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult; import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult; @@ -53,6 +53,7 @@ import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; import org.apache.cloudstack.storage.to.SnapshotObjectTO; import org.apache.cloudstack.storage.to.TemplateObjectTO; import org.apache.cloudstack.storage.volume.VolumeObject; +import org.apache.log4j.Logger; import com.cloud.agent.api.Answer; import com.cloud.agent.api.storage.ResizeVolumeAnswer; @@ -70,16 +71,17 @@ import com.cloud.storage.ResizeVolumePayload; import com.cloud.storage.Storage; import com.cloud.storage.StorageManager; import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; import com.cloud.storage.dao.DiskOfferingDao; import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.VMTemplateDao; import com.cloud.storage.dao.VolumeDao; import com.cloud.storage.snapshot.SnapshotManager; import com.cloud.template.TemplateManager; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; import com.cloud.vm.dao.VMInstanceDao; -import static com.cloud.utils.NumbersUtil.toHumanReadableSize; - public class CloudStackPrimaryDataStoreDriverImpl implements PrimaryDataStoreDriver { @Override public Map getCapabilities() { @@ -211,10 +213,22 @@ public class CloudStackPrimaryDataStoreDriverImpl implements PrimaryDataStoreDri } } + private boolean commandCanBypassHostMaintenance(DataObject data) { + if (DataObjectType.VOLUME.equals(data.getType())) { + Volume volume = (Volume)data; + if (volume.getInstanceId() != null) { + VMInstanceVO vm = vmDao.findById(volume.getInstanceId()); + return vm != null && (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) || + VirtualMachine.Type.ConsoleProxy.equals(vm.getType())); + } + } + return false; + } + @Override public void deleteAsync(DataStore dataStore, DataObject data, AsyncCompletionCallback callback) { DeleteCommand cmd = new DeleteCommand(data.getTO()); - + cmd.setBypassHostMaintenance(commandCanBypassHostMaintenance(data)); CommandResult result = new CommandResult(); try { EndPoint ep = null; diff --git a/server/src/main/java/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java b/server/src/main/java/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java index 8dfa1f67db0..3a89d9641c5 100644 --- a/server/src/main/java/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java +++ b/server/src/main/java/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java @@ -1006,6 +1006,13 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy } public boolean isZoneReady(Map zoneHostInfoMap, long dataCenterId) { + List hosts = _hostDao.listByDataCenterId(dataCenterId); + if (CollectionUtils.isEmpty(hosts)) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Zone " + dataCenterId + " has no host available which is enabled and in Up state"); + } + return false; + } ZoneHostInfo zoneHostInfo = zoneHostInfoMap.get(dataCenterId); if (zoneHostInfo != null && isZoneHostReady(zoneHostInfo)) { VMTemplateVO template = _templateDao.findSystemVMReadyTemplate(dataCenterId, HypervisorType.Any); diff --git a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java index b05e008546b..6991ab4039d 100644 --- a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -16,9 +16,32 @@ // under the License. package com.cloud.ha; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import javax.inject.Inject; +import javax.naming.ConfigurationException; + +import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.cloudstack.managed.context.ManagedContext; +import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.management.ManagementServerHost; +import org.apache.log4j.Logger; +import org.apache.log4j.NDC; + import com.cloud.agent.AgentManager; import com.cloud.alert.AlertManager; import com.cloud.cluster.ClusterManagerListener; +import com.cloud.consoleproxy.ConsoleProxyManager; import com.cloud.dc.ClusterDetailsDao; import com.cloud.dc.DataCenterVO; import com.cloud.dc.HostPodVO; @@ -46,37 +69,16 @@ import com.cloud.service.dao.ServiceOfferingDao; import com.cloud.storage.StorageManager; import com.cloud.storage.dao.GuestOSCategoryDao; import com.cloud.storage.dao.GuestOSDao; +import com.cloud.storage.secondary.SecondaryStorageVmManager; import com.cloud.user.AccountManager; import com.cloud.utils.component.ManagerBase; import com.cloud.utils.concurrency.NamedThreadFactory; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; -import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.dao.VMInstanceDao; -import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService; -import org.apache.cloudstack.framework.config.ConfigKey; -import org.apache.cloudstack.framework.config.Configurable; -import org.apache.cloudstack.framework.config.dao.ConfigurationDao; -import org.apache.cloudstack.managed.context.ManagedContext; -import org.apache.cloudstack.managed.context.ManagedContextRunnable; -import org.apache.cloudstack.management.ManagementServerHost; -import org.apache.log4j.Logger; -import org.apache.log4j.NDC; - -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; - -import javax.inject.Inject; -import javax.naming.ConfigurationException; /** * HighAvailabilityManagerImpl coordinates the HA process. VMs are registered with the HA Manager for HA. The request is stored @@ -125,9 +127,12 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur HostPodDao _podDao; @Inject ClusterDetailsDao _clusterDetailsDao; - @Inject ServiceOfferingDao _serviceOfferingDao; + @Inject + private ConsoleProxyManager consoleProxyManager; + @Inject + private SecondaryStorageVmManager secondaryStorageVmManager; long _serverId; @@ -680,31 +685,51 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur _haDao.delete(vm.getId(), WorkType.Destroy); } + private void stopVMWithCleanup(VirtualMachine vm, VirtualMachine.State state) throws OperationTimedoutException, ResourceUnavailableException { + if (VirtualMachine.State.Running.equals(state)) { + _itMgr.advanceStop(vm.getUuid(), true); + } + } + + private void destroyVM(VirtualMachine vm, boolean expunge) throws OperationTimedoutException, AgentUnavailableException { + s_logger.info("Destroying " + vm.toString()); + if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { + consoleProxyManager.destroyProxy(vm.getId()); + } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { + secondaryStorageVmManager.destroySecStorageVm(vm.getId()); + } else { + _itMgr.destroy(vm.getUuid(), expunge); + } + } + protected Long destroyVM(final HaWorkVO work) { final VirtualMachine vm = _itMgr.findById(work.getInstanceId()); - s_logger.info("Destroying " + vm.toString()); + if (vm == null) { + s_logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work); + return null; + } + boolean expunge = VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) + || VirtualMachine.Type.ConsoleProxy.equals(vm.getType()); + if (!expunge && VirtualMachine.State.Destroyed.equals(work.getPreviousState())) { + s_logger.info("VM " + vm.getUuid() + " already in " + vm.getState() + " state. Throwing away " + work); + return null; + } try { - if (vm.getState() != State.Destroyed) { - s_logger.info("VM is no longer in Destroyed state " + vm.toString()); - return null; - } - - if (vm.getHostId() != null) { - _itMgr.destroy(vm.getUuid(), false); - s_logger.info("Successfully destroy " + vm); + stopVMWithCleanup(vm, work.getPreviousState()); + if (!VirtualMachine.State.Expunging.equals(work.getPreviousState())) { + destroyVM(vm, expunge); return null; } else { - if (s_logger.isDebugEnabled()) { - s_logger.debug(vm + " has already been stopped"); - } - return null; + s_logger.info("VM " + vm.getUuid() + " still in " + vm.getState() + " state."); } } catch (final AgentUnavailableException e) { - s_logger.debug("Agnet is not available" + e.getMessage()); + s_logger.debug("Agent is not available" + e.getMessage()); } catch (OperationTimedoutException e) { s_logger.debug("operation timed out: " + e.getMessage()); } catch (ConcurrentOperationException e) { s_logger.debug("concurrent operation: " + e.getMessage()); + } catch (ResourceUnavailableException e) { + s_logger.debug("Resource unavailable: " + e.getMessage()); } return (System.currentTimeMillis() >> 10) + _stopRetryInterval; @@ -793,9 +818,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur case Stop: case CheckStop: case ForceStop: - return ((System.currentTimeMillis() >> 10) + _stopRetryInterval); case Destroy: - return ((System.currentTimeMillis() >> 10) + _restartRetryInterval); + return ((System.currentTimeMillis() >> 10) + _stopRetryInterval); } return 0; } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index f2a3caa6000..17c84c71d23 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -16,6 +16,8 @@ // under the License. package com.cloud.resource; +import static com.cloud.configuration.ConfigurationManagerImpl.SET_HOST_DOWN_TO_MAINTENANCE; + import java.net.URI; import java.net.URISyntaxException; import java.net.URLDecoder; @@ -180,9 +182,6 @@ import com.cloud.vm.dao.UserVmDetailsDao; import com.cloud.vm.dao.VMInstanceDao; import com.google.gson.Gson; - -import static com.cloud.configuration.ConfigurationManagerImpl.SET_HOST_DOWN_TO_MAINTENANCE; - @Component public class ResourceManagerImpl extends ManagerBase implements ResourceManager, ResourceService, Manager { private static final Logger s_logger = Logger.getLogger(ResourceManagerImpl.class); @@ -1229,6 +1228,19 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, return _hostDao.updateResourceState(currentState, event, nextState, host); } + private void handleVmForLastHostOrWithVGpu(final HostVO host, final VMInstanceVO vm) { + // Migration is not supported for VGPU Vms so stop them. + // for the last host in this cluster, destroy SSVM/CPVM and stop all other VMs + if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) + || VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { + s_logger.error(String.format("Maintenance: VM is of type %s. Destroying VM %s (ID: %s) immediately instead of migration.", vm.getType().toString(), vm.getInstanceName(), vm.getUuid())); + _haMgr.scheduleDestroy(vm, host.getId()); + return; + } + s_logger.error(String.format("Maintenance: No hosts available for migrations. Scheduling shutdown for VM %s instead of migration.", vm.getUuid())); + _haMgr.scheduleStop(vm, host.getId(), WorkType.ForceStop); + } + private boolean doMaintain(final long hostId) { final HostVO host = _hostDao.findById(hostId); s_logger.info("Maintenance: attempting maintenance of host " + host.getUuid()); @@ -1266,10 +1278,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, for (final VMInstanceVO vm : vms) { if (hosts == null || hosts.isEmpty() || !answer.getMigrate() || _serviceOfferingDetailsDao.findDetail(vm.getServiceOfferingId(), GPU.Keys.vgpuType.toString()) != null) { - // Migration is not supported for VGPU Vms so stop them. - // for the last host in this cluster, stop all the VMs - s_logger.error("Maintenance: No hosts available for migrations. Scheduling shutdown instead of migrations."); - _haMgr.scheduleStop(vm, hostId, WorkType.ForceStop); + handleVmForLastHostOrWithVGpu(host, vm); } else if (HypervisorType.LXC.equals(host.getHypervisorType()) && VirtualMachine.Type.User.equals(vm.getType())){ //Migration is not supported for LXC Vms. Schedule restart instead. _haMgr.scheduleRestart(vm, false); @@ -1417,7 +1426,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, * on a host. We need to track the various VM states on each run and accordingly transit to the * appropriate state. * - * We change states as follws - + * We change states as follows - * 1. If there are no VMs in running, migrating, starting, stopping, error, unknown states we can move * to maintenance state. Note that there cannot be incoming migrations as the API Call prepare for * maintenance checks incoming migrations before starting. diff --git a/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java b/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java index 7410f1e6e03..39fa6bb2d67 100644 --- a/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java +++ b/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java @@ -44,6 +44,7 @@ import org.mockito.runners.MockitoJUnitRunner; import com.cloud.agent.AgentManager; import com.cloud.alert.AlertManager; +import com.cloud.consoleproxy.ConsoleProxyManager; import com.cloud.dc.ClusterDetailsDao; import com.cloud.dc.DataCenterVO; import com.cloud.dc.HostPodVO; @@ -64,6 +65,7 @@ import com.cloud.service.dao.ServiceOfferingDao; import com.cloud.storage.StorageManager; import com.cloud.storage.dao.GuestOSCategoryDao; import com.cloud.storage.dao.GuestOSDao; +import com.cloud.storage.secondary.SecondaryStorageVmManager; import com.cloud.user.AccountManager; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; @@ -112,6 +114,10 @@ public class HighAvailabilityManagerImplTest { @Mock VolumeOrchestrationService volumeMgr; @Mock + ConsoleProxyManager consoleProxyManager; + @Mock + SecondaryStorageVmManager secondaryStorageVmManager; + @Mock HostVO hostVO; HighAvailabilityManagerImpl highAvailabilityManager; diff --git a/services/secondary-storage/controller/src/main/java/org/apache/cloudstack/secondarystorage/SecondaryStorageManagerImpl.java b/services/secondary-storage/controller/src/main/java/org/apache/cloudstack/secondarystorage/SecondaryStorageManagerImpl.java index 2fc1eeda53c..f0b3c9172f9 100644 --- a/services/secondary-storage/controller/src/main/java/org/apache/cloudstack/secondarystorage/SecondaryStorageManagerImpl.java +++ b/services/secondary-storage/controller/src/main/java/org/apache/cloudstack/secondarystorage/SecondaryStorageManagerImpl.java @@ -811,6 +811,13 @@ public class SecondaryStorageManagerImpl extends ManagerBase implements Secondar } public boolean isZoneReady(Map zoneHostInfoMap, long dataCenterId) { + List hosts = _hostDao.listByDataCenterId(dataCenterId); + if (CollectionUtils.isEmpty(hosts)) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Zone " + dataCenterId + " has no host available which is enabled and in Up state"); + } + return false; + } ZoneHostInfo zoneHostInfo = zoneHostInfoMap.get(dataCenterId); if (zoneHostInfo != null && (zoneHostInfo.getFlags() & RunningHostInfoAgregator.ZoneHostInfo.ROUTING_HOST_MASK) != 0) { VMTemplateVO template = _templateDao.findSystemVMReadyTemplate(dataCenterId, HypervisorType.Any);