From 796bd4f72cc021b91441b6c34a9d8f7425c53374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardo=20De=20Marco=20Gon=C3=A7alves?= Date: Wed, 15 Jan 2025 13:38:20 -0300 Subject: [PATCH 01/50] Clean up network permissions on account deletion (#10176) --- .../network/dao/NetworkPermissionDao.java | 7 +++++ .../network/dao/NetworkPermissionDaoImpl.java | 15 ++++++++++ .../com/cloud/user/AccountManagerImpl.java | 29 ++++++++++++------- .../cloud/user/AccountManagerImplTest.java | 18 ++++++++++++ .../user/AccountManagetImplTestBase.java | 3 ++ 5 files changed, 61 insertions(+), 11 deletions(-) diff --git a/engine/schema/src/main/java/org/apache/cloudstack/network/dao/NetworkPermissionDao.java b/engine/schema/src/main/java/org/apache/cloudstack/network/dao/NetworkPermissionDao.java index 1c8d1cf48ff..e8b6322baee 100644 --- a/engine/schema/src/main/java/org/apache/cloudstack/network/dao/NetworkPermissionDao.java +++ b/engine/schema/src/main/java/org/apache/cloudstack/network/dao/NetworkPermissionDao.java @@ -40,6 +40,13 @@ public interface NetworkPermissionDao extends GenericDao NetworkAndAccountSearch; private SearchBuilder NetworkIdSearch; + private SearchBuilder accountSearch; private GenericSearchBuilder FindNetworkIdsByAccount; protected NetworkPermissionDaoImpl() { @@ -47,6 +48,10 @@ public class NetworkPermissionDaoImpl extends GenericDaoBase sc = accountSearch.create(); + sc.setParameters("accountId", accountId); + int networkPermissionRemoved = expunge(sc); + if (networkPermissionRemoved > 0) { + s_logger.debug(String.format("Removed [%s] network permission(s) for the account with Id [%s]", networkPermissionRemoved, accountId)); + } + } + @Override public NetworkPermissionVO findByNetworkAndAccount(long networkId, long accountId) { SearchCriteria sc = NetworkAndAccountSearch.create(); diff --git a/server/src/main/java/com/cloud/user/AccountManagerImpl.java b/server/src/main/java/com/cloud/user/AccountManagerImpl.java index 2d7ebf595fd..1e727036d56 100644 --- a/server/src/main/java/com/cloud/user/AccountManagerImpl.java +++ b/server/src/main/java/com/cloud/user/AccountManagerImpl.java @@ -74,6 +74,7 @@ import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.framework.messagebus.MessageBus; import org.apache.cloudstack.framework.messagebus.PublishScope; import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.network.dao.NetworkPermissionDao; import org.apache.cloudstack.region.gslb.GlobalLoadBalancerRuleDao; import org.apache.cloudstack.resourcedetail.UserDetailVO; import org.apache.cloudstack.resourcedetail.dao.UserDetailsDao; @@ -298,6 +299,8 @@ public class AccountManagerImpl extends ManagerBase implements AccountManager, M private SSHKeyPairDao _sshKeyPairDao; @Inject private UserDataDao userDataDao; + @Inject + private NetworkPermissionDao networkPermissionDao; private List _querySelectors; @@ -870,6 +873,9 @@ public class AccountManagerImpl extends ManagerBase implements AccountManager, M // delete the account from project accounts _projectAccountDao.removeAccountFromProjects(accountId); + // Delete account's network permissions + networkPermissionDao.removeAccountPermissions(accountId); + if (account.getType() != Account.Type.PROJECT) { // delete the account from group _messageBus.publish(_name, MESSAGE_REMOVE_ACCOUNT_EVENT, PublishScope.LOCAL, accountId); @@ -1857,26 +1863,27 @@ public class AccountManagerImpl extends ManagerBase implements AccountManager, M // If the user is a System user, return an error. We do not allow this AccountVO account = _accountDao.findById(accountId); - if (! isDeleteNeeded(account, accountId, caller)) { + if (!isDeleteNeeded(account, accountId, caller)) { return true; } - // Account that manages project(s) can't be removed - List managedProjectIds = _projectAccountDao.listAdministratedProjectIds(accountId); - if (!managedProjectIds.isEmpty()) { - StringBuilder projectIds = new StringBuilder(); - for (Long projectId : managedProjectIds) { - projectIds.append(projectId).append(", "); - } - - throw new InvalidParameterValueException("The account id=" + accountId + " manages project(s) with ids " + projectIds + "and can't be removed"); - } + checkIfAccountManagesProjects(accountId); CallContext.current().putContextParameter(Account.class, account.getUuid()); return deleteAccount(account, callerUserId, caller); } + protected void checkIfAccountManagesProjects(long accountId) { + List managedProjectIds = _projectAccountDao.listAdministratedProjectIds(accountId); + if (!CollectionUtils.isEmpty(managedProjectIds)) { + throw new InvalidParameterValueException(String.format( + "Unable to delete account [%s], because it manages the following project(s): %s. Please, remove the account from these projects or demote it to a regular project role first.", + accountId, managedProjectIds + )); + } + } + private boolean isDeleteNeeded(AccountVO account, long accountId, Account caller) { if (account == null) { s_logger.info(String.format("The account, identified by id %d, doesn't exist", accountId )); diff --git a/server/src/test/java/com/cloud/user/AccountManagerImplTest.java b/server/src/test/java/com/cloud/user/AccountManagerImplTest.java index ed0a123d4a3..0e8e1df0f3a 100644 --- a/server/src/test/java/com/cloud/user/AccountManagerImplTest.java +++ b/server/src/test/java/com/cloud/user/AccountManagerImplTest.java @@ -1200,4 +1200,22 @@ public class AccountManagerImplTest extends AccountManagetImplTestBase { Mockito.when(roleService.findRole(2L)).thenReturn(callerRole); accountManagerImpl.validateRoleChange(account, newRole, caller); } + + @Test + public void checkIfAccountManagesProjectsTestNotThrowExceptionWhenTheAccountIsNotAProjectAdministrator() { + long accountId = 1L; + List managedProjectIds = new ArrayList<>(); + + Mockito.when(_projectAccountDao.listAdministratedProjectIds(accountId)).thenReturn(managedProjectIds); + accountManagerImpl.checkIfAccountManagesProjects(accountId); + } + + @Test(expected = InvalidParameterValueException.class) + public void checkIfAccountManagesProjectsTestThrowExceptionWhenTheAccountIsAProjectAdministrator() { + long accountId = 1L; + List managedProjectIds = List.of(1L); + + Mockito.when(_projectAccountDao.listAdministratedProjectIds(accountId)).thenReturn(managedProjectIds); + accountManagerImpl.checkIfAccountManagesProjects(accountId); + } } diff --git a/server/src/test/java/com/cloud/user/AccountManagetImplTestBase.java b/server/src/test/java/com/cloud/user/AccountManagetImplTestBase.java index 7f9fa488471..2fa18221a94 100644 --- a/server/src/test/java/com/cloud/user/AccountManagetImplTestBase.java +++ b/server/src/test/java/com/cloud/user/AccountManagetImplTestBase.java @@ -65,6 +65,7 @@ import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationSe import org.apache.cloudstack.engine.service.api.OrchestrationService; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.framework.messagebus.MessageBus; +import org.apache.cloudstack.network.dao.NetworkPermissionDao; import org.apache.cloudstack.region.gslb.GlobalLoadBalancerRuleDao; import org.apache.cloudstack.resourcedetail.dao.UserDetailsDao; import org.junit.After; @@ -195,6 +196,8 @@ public class AccountManagetImplTestBase { SSHKeyPairDao _sshKeyPairDao; @Mock UserDataDao userDataDao; + @Mock + NetworkPermissionDao networkPermissionDaoMock; @Spy @InjectMocks From 9967bb3fe899f82ce5fc83da5eba0d278a2df70a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardo=20De=20Marco=20Gon=C3=A7alves?= Date: Thu, 16 Jan 2025 11:48:21 -0300 Subject: [PATCH 02/50] fix slider component for global settings of the range type (#10187) --- ui/src/views/setting/ConfigurationValue.vue | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ui/src/views/setting/ConfigurationValue.vue b/ui/src/views/setting/ConfigurationValue.vue index 836aed69dd3..109931a6664 100644 --- a/ui/src/views/setting/ConfigurationValue.vue +++ b/ui/src/views/setting/ConfigurationValue.vue @@ -55,9 +55,9 @@ /> - - - + + + - + Date: Thu, 16 Jan 2025 12:18:30 -0300 Subject: [PATCH 03/50] Add project-user association normalization script to 4.20.1 upgrade (#10116) --- .../main/resources/META-INF/db/schema-42000to42010-cleanup.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/engine/schema/src/main/resources/META-INF/db/schema-42000to42010-cleanup.sql b/engine/schema/src/main/resources/META-INF/db/schema-42000to42010-cleanup.sql index d187b6fa043..a00d50a7e10 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-42000to42010-cleanup.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-42000to42010-cleanup.sql @@ -18,3 +18,6 @@ --; -- Schema upgrade cleanup from 4.20.0.0 to 4.20.1.0 --; + +-- Delete `project_account` entries for users that were removed +DELETE FROM `cloud`.`project_account` WHERE `user_id` IN (SELECT `id` FROM `cloud`.`user` WHERE `removed`); From a163831b7e884b11eb8b41b2aff592edc2d33061 Mon Sep 17 00:00:00 2001 From: BartJM Date: Mon, 20 Jan 2025 11:54:30 +0100 Subject: [PATCH 04/50] Maintenance mode: Add host to deployment planner avoid list to fix local storage vm migration (#9892) --- .../src/main/java/com/cloud/resource/ResourceManagerImpl.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 4b26c7d3f38..50116905bfe 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -1473,8 +1473,10 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, offeringVO, null, null); plan.setMigrationPlan(true); DeployDestination dest = null; + DeploymentPlanner.ExcludeList avoids = new DeploymentPlanner.ExcludeList(); + avoids.addHost(host.getId()); try { - dest = deploymentManager.planDeployment(profile, plan, new DeploymentPlanner.ExcludeList(), null); + dest = deploymentManager.planDeployment(profile, plan, avoids, null); } catch (InsufficientServerCapacityException e) { throw new CloudRuntimeException(String.format("Maintenance failed, could not find deployment destination for VM: %s.", vm), e); } From c5e8f63452ac61431b1722400ce5c19289cbe187 Mon Sep 17 00:00:00 2001 From: Suresh Kumar Anaparti Date: Mon, 20 Jan 2025 17:50:17 +0530 Subject: [PATCH 05/50] Fix NPE issues during host rolling maintenance, due to host tags and custom constrained/unconstrained service offering (#9844) --- .../RollingMaintenanceManagerImpl.java | 59 ++++++++++++++++-- .../RollingMaintenanceManagerImplTest.java | 60 +++++++++++++++++++ 2 files changed, 113 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java index 25b2ad53bf2..4ada43308ee 100644 --- a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java @@ -37,6 +37,7 @@ import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceC import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.ObjectUtils; import org.apache.log4j.Logger; import com.cloud.agent.AgentManager; @@ -65,12 +66,16 @@ import com.cloud.org.Grouping; import com.cloud.service.ServiceOfferingVO; import com.cloud.service.dao.ServiceOfferingDao; import com.cloud.utils.Pair; +import com.cloud.utils.StringUtils; import com.cloud.utils.Ternary; import com.cloud.utils.component.ManagerBase; import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.vm.UserVmDetailVO; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineProfileImpl; +import com.cloud.vm.VmDetailConstants; +import com.cloud.vm.dao.UserVmDetailsDao; import com.cloud.vm.dao.VMInstanceDao; public class RollingMaintenanceManagerImpl extends ManagerBase implements RollingMaintenanceManager { @@ -86,6 +91,8 @@ public class RollingMaintenanceManagerImpl extends ManagerBase implements Rollin @Inject private VMInstanceDao vmInstanceDao; @Inject + protected UserVmDetailsDao userVmDetailsDao; + @Inject private ServiceOfferingDao serviceOfferingDao; @Inject private ClusterDetailsDao clusterDetailsDao; @@ -621,10 +628,19 @@ public class RollingMaintenanceManagerImpl extends ManagerBase implements Rollin int successfullyCheckedVmMigrations = 0; for (VMInstanceVO runningVM : vmsRunning) { boolean canMigrateVm = false; + Ternary cpuSpeedAndRamSize = getComputeResourcesCpuSpeedAndRamSize(runningVM); + Integer cpu = cpuSpeedAndRamSize.first(); + Integer speed = cpuSpeedAndRamSize.second(); + Integer ramSize = cpuSpeedAndRamSize.third(); + if (ObjectUtils.anyNull(cpu, speed, ramSize)) { + s_logger.warn(String.format("Cannot fetch compute resources for the VM %s, skipping it from the capacity check", runningVM)); + continue; + } + ServiceOfferingVO serviceOffering = serviceOfferingDao.findById(runningVM.getServiceOfferingId()); for (Host hostInCluster : hostsInCluster) { if (!checkHostTags(hostTags, hostTagsDao.getHostTags(hostInCluster.getId()), serviceOffering.getHostTag())) { - s_logger.debug(String.format("Host tags mismatch between %s and %s Skipping it from the capacity check", host, hostInCluster)); + s_logger.warn(String.format("Host tags mismatch between %s and %s, skipping it from the capacity check", host, hostInCluster)); continue; } DeployDestination deployDestination = new DeployDestination(null, null, null, host); @@ -634,13 +650,13 @@ public class RollingMaintenanceManagerImpl extends ManagerBase implements Rollin affinityChecks = affinityChecks && affinityProcessor.check(vmProfile, deployDestination); } if (!affinityChecks) { - s_logger.debug(String.format("Affinity check failed between %s and %s Skipping it from the capacity check", host, hostInCluster)); + s_logger.warn(String.format("Affinity check failed between %s and %s, skipping it from the capacity check", host, hostInCluster)); continue; } boolean maxGuestLimit = capacityManager.checkIfHostReachMaxGuestLimit(host); - boolean hostHasCPUCapacity = capacityManager.checkIfHostHasCpuCapability(hostInCluster.getId(), serviceOffering.getCpu(), serviceOffering.getSpeed()); - int cpuRequested = serviceOffering.getCpu() * serviceOffering.getSpeed(); - long ramRequested = serviceOffering.getRamSize() * 1024L * 1024L; + boolean hostHasCPUCapacity = capacityManager.checkIfHostHasCpuCapability(hostInCluster.getId(), cpu, speed); + int cpuRequested = cpu * speed; + long ramRequested = ramSize * 1024L * 1024L; ClusterDetailsVO clusterDetailsCpuOvercommit = clusterDetailsDao.findDetail(cluster.getId(), "cpuOvercommitRatio"); ClusterDetailsVO clusterDetailsRamOvercommmt = clusterDetailsDao.findDetail(cluster.getId(), "memoryOvercommitRatio"); Float cpuOvercommitRatio = Float.parseFloat(clusterDetailsCpuOvercommit.getValue()); @@ -666,11 +682,42 @@ public class RollingMaintenanceManagerImpl extends ManagerBase implements Rollin return new Pair<>(true, "OK"); } + protected Ternary getComputeResourcesCpuSpeedAndRamSize(VMInstanceVO runningVM) { + ServiceOfferingVO serviceOffering = serviceOfferingDao.findById(runningVM.getServiceOfferingId()); + Integer cpu = serviceOffering.getCpu(); + Integer speed = serviceOffering.getSpeed(); + Integer ramSize = serviceOffering.getRamSize(); + if (!serviceOffering.isDynamic()) { + return new Ternary<>(cpu, speed, ramSize); + } + + List vmDetails = userVmDetailsDao.listDetails(runningVM.getId()); + if (CollectionUtils.isEmpty(vmDetails)) { + return new Ternary<>(cpu, speed, ramSize); + } + + for (UserVmDetailVO vmDetail : vmDetails) { + if (StringUtils.isBlank(vmDetail.getName()) || StringUtils.isBlank(vmDetail.getValue())) { + continue; + } + + if (cpu == null && VmDetailConstants.CPU_NUMBER.equals(vmDetail.getName())) { + cpu = Integer.valueOf(vmDetail.getValue()); + } else if (speed == null && VmDetailConstants.CPU_SPEED.equals(vmDetail.getName())) { + speed = Integer.valueOf(vmDetail.getValue()); + } else if (ramSize == null && VmDetailConstants.MEMORY.equals(vmDetail.getName())) { + ramSize = Integer.valueOf(vmDetail.getValue()); + } + } + + return new Ternary<>(cpu, speed, ramSize); + } + /** * Check hosts tags */ private boolean checkHostTags(List hostTags, List hostInClusterTags, String offeringTag) { - if (CollectionUtils.isEmpty(hostTags) && CollectionUtils.isEmpty(hostInClusterTags)) { + if ((CollectionUtils.isEmpty(hostTags) && CollectionUtils.isEmpty(hostInClusterTags)) || StringUtils.isBlank(offeringTag)) { return true; } else if ((CollectionUtils.isNotEmpty(hostTags) && CollectionUtils.isEmpty(hostInClusterTags)) || (CollectionUtils.isEmpty(hostTags) && CollectionUtils.isNotEmpty(hostInClusterTags))) { diff --git a/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java b/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java index ef0277fd372..d8363964f05 100644 --- a/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java +++ b/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java @@ -23,7 +23,15 @@ import com.cloud.host.Status; import com.cloud.host.dao.HostDao; import com.cloud.hypervisor.Hypervisor; import com.cloud.org.Cluster; +import com.cloud.service.ServiceOfferingVO; +import com.cloud.service.dao.ServiceOfferingDao; +import com.cloud.utils.Ternary; import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.vm.UserVmDetailVO; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VmDetailConstants; +import com.cloud.vm.dao.UserVmDetailsDao; + import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -53,6 +61,12 @@ public class RollingMaintenanceManagerImplTest { HostVO host4; @Mock Cluster cluster; + @Mock + VMInstanceVO vm; + @Mock + ServiceOfferingDao serviceOfferingDao; + @Mock + UserVmDetailsDao userVmDetailsDao; @Spy @InjectMocks @@ -164,4 +178,50 @@ public class RollingMaintenanceManagerImplTest { Assert.assertEquals(1, hosts.size()); } + + @Test + public void testGetComputeResourcesCpuSpeedAndRamSize_ForNormalOffering() { + ServiceOfferingVO serviceOffering = Mockito.mock(ServiceOfferingVO.class); + Mockito.when(serviceOffering.isDynamic()).thenReturn(false); + Mockito.when(serviceOffering.getCpu()).thenReturn(1); + Mockito.when(serviceOffering.getSpeed()).thenReturn(500); + Mockito.when(serviceOffering.getRamSize()).thenReturn(512); + + Mockito.when(vm.getServiceOfferingId()).thenReturn(1L); + Mockito.when(serviceOfferingDao.findById(1L)).thenReturn(serviceOffering); + + Ternary cpuSpeedAndRamSize = manager.getComputeResourcesCpuSpeedAndRamSize(vm); + + Assert.assertEquals(1, cpuSpeedAndRamSize.first().intValue()); + Assert.assertEquals(500, cpuSpeedAndRamSize.second().intValue()); + Assert.assertEquals(512, cpuSpeedAndRamSize.third().intValue()); + } + + @Test + public void testGetComputeResourcesCpuSpeedAndRamSize_ForCustomOffering() { + ServiceOfferingVO serviceOffering = Mockito.mock(ServiceOfferingVO.class); + Mockito.when(serviceOffering.isDynamic()).thenReturn(true); + Mockito.when(serviceOffering.getCpu()).thenReturn(null); + Mockito.when(serviceOffering.getSpeed()).thenReturn(null); + Mockito.when(serviceOffering.getRamSize()).thenReturn(null); + + List vmDetails = new ArrayList<>(); + UserVmDetailVO cpuDetail = new UserVmDetailVO(1L, VmDetailConstants.CPU_NUMBER, "2", false); + vmDetails.add(cpuDetail); + UserVmDetailVO speedDetail = new UserVmDetailVO(1L, VmDetailConstants.CPU_SPEED, "1000", false); + vmDetails.add(speedDetail); + UserVmDetailVO ramSizeDetail = new UserVmDetailVO(1L, VmDetailConstants.MEMORY, "1024", false); + vmDetails.add(ramSizeDetail); + + Mockito.when(vm.getId()).thenReturn(1L); + Mockito.when(vm.getServiceOfferingId()).thenReturn(1L); + Mockito.when(serviceOfferingDao.findById(1L)).thenReturn(serviceOffering); + Mockito.when(userVmDetailsDao.listDetails(1L)).thenReturn(vmDetails); + + Ternary cpuSpeedAndRamSize = manager.getComputeResourcesCpuSpeedAndRamSize(vm); + + Assert.assertEquals(2, cpuSpeedAndRamSize.first().intValue()); + Assert.assertEquals(1000, cpuSpeedAndRamSize.second().intValue()); + Assert.assertEquals(1024, cpuSpeedAndRamSize.third().intValue()); + } } From 00c659b7a76d0f93d7467a70b5673e1ec31dc844 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 20 Jan 2025 18:59:27 +0530 Subject: [PATCH 06/50] api: fix access for listSystemVmUsageHistory (#10032) Signed-off-by: Abhishek Kumar --- .../org/apache/cloudstack/api/ListSystemVMsUsageHistoryCmd.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/api/ListSystemVMsUsageHistoryCmd.java b/plugins/metrics/src/main/java/org/apache/cloudstack/api/ListSystemVMsUsageHistoryCmd.java index e2d3af24aef..5b279eb8bcb 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/api/ListSystemVMsUsageHistoryCmd.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/api/ListSystemVMsUsageHistoryCmd.java @@ -26,7 +26,7 @@ import org.apache.cloudstack.response.VmMetricsStatsResponse; @APICommand(name = "listSystemVmsUsageHistory", description = "Lists System VM stats", responseObject = VmMetricsStatsResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, since = "4.18.0", - authorized = {RoleType.Admin, RoleType.ResourceAdmin, RoleType.DomainAdmin}) + authorized = {RoleType.Admin}) public class ListSystemVMsUsageHistoryCmd extends BaseResourceUsageHistoryCmd { ///////////////////////////////////////////////////// From 1c626c884c6e4de988f3a42c5d38b0ca41dc91dd Mon Sep 17 00:00:00 2001 From: dahn Date: Mon, 20 Jan 2025 17:06:59 +0100 Subject: [PATCH 07/50] Update bug.yml --- .github/ISSUE_TEMPLATE/bug.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index 8044cc041df..10f05a1adfd 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -26,6 +26,13 @@ body: attributes: label: problem value: The long description of your problem +- type: markdown + attributes: + value: "## What versions of cloudstack and any infra components are you using" +- type: textarea + attributes: + label: versions + value: The versions of ACS, hypervisors, storage, network etc.. - type: textarea attributes: label: The steps to reproduce the bug From 0b8076c38cf7e5f4236d3035f0294dedd4eb1921 Mon Sep 17 00:00:00 2001 From: Abhisar Sinha <63767682+abh1sar@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:58:51 +0530 Subject: [PATCH 08/50] Configure org.eclipse.jetty.server.Request.maxFormKeys from server.properties and increase the default value (#10214) --- client/conf/server.properties.in | 3 +++ .../main/java/org/apache/cloudstack/ServerDaemon.java | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/client/conf/server.properties.in b/client/conf/server.properties.in index 57d81c81217..0a6078048d3 100644 --- a/client/conf/server.properties.in +++ b/client/conf/server.properties.in @@ -32,6 +32,9 @@ session.timeout=30 # Max allowed API request payload/content size in bytes request.content.size=1048576 +# Max allowed API request form keys +request.max.form.keys=5000 + # Options to configure and enable HTTPS on the management server # # For the management server to pick up these configuration settings, the configured diff --git a/client/src/main/java/org/apache/cloudstack/ServerDaemon.java b/client/src/main/java/org/apache/cloudstack/ServerDaemon.java index fb84e1297e6..e33a4084e4e 100644 --- a/client/src/main/java/org/apache/cloudstack/ServerDaemon.java +++ b/client/src/main/java/org/apache/cloudstack/ServerDaemon.java @@ -81,6 +81,8 @@ public class ServerDaemon implements Daemon { private static final String ACCESS_LOG = "access.log"; private static final String REQUEST_CONTENT_SIZE_KEY = "request.content.size"; private static final int DEFAULT_REQUEST_CONTENT_SIZE = 1048576; + private static final String REQUEST_MAX_FORM_KEYS_KEY = "request.max.form.keys"; + private static final int DEFAULT_REQUEST_MAX_FORM_KEYS = 5000; //////////////////////////////////////////////////////// /////////////// Server Configuration /////////////////// @@ -93,6 +95,7 @@ public class ServerDaemon implements Daemon { private int httpsPort = 8443; private int sessionTimeout = 30; private int maxFormContentSize = DEFAULT_REQUEST_CONTENT_SIZE; + private int maxFormKeys = DEFAULT_REQUEST_MAX_FORM_KEYS; private boolean httpsEnable = false; private String accessLogFile = "access.log"; private String bindInterface = null; @@ -140,6 +143,7 @@ public class ServerDaemon implements Daemon { setAccessLogFile(properties.getProperty(ACCESS_LOG, "access.log")); setSessionTimeout(Integer.valueOf(properties.getProperty(SESSION_TIMEOUT, "30"))); setMaxFormContentSize(Integer.valueOf(properties.getProperty(REQUEST_CONTENT_SIZE_KEY, String.valueOf(DEFAULT_REQUEST_CONTENT_SIZE)))); + setMaxFormKeys(Integer.valueOf(properties.getProperty(REQUEST_MAX_FORM_KEYS_KEY, String.valueOf(DEFAULT_REQUEST_MAX_FORM_KEYS)))); } catch (final IOException e) { LOG.warn("Failed to read configuration from server.properties file", e); } finally { @@ -191,6 +195,7 @@ public class ServerDaemon implements Daemon { // Extra config options server.setStopAtShutdown(true); server.setAttribute(ContextHandler.MAX_FORM_CONTENT_SIZE_KEY, maxFormContentSize); + server.setAttribute(ContextHandler.MAX_FORM_KEYS_KEY, maxFormKeys); // HTTPS Connector createHttpsConnector(httpConfig); @@ -263,6 +268,7 @@ public class ServerDaemon implements Daemon { webApp.setContextPath(contextPath); webApp.setInitParameter("org.eclipse.jetty.servlet.Default.dirAllowed", "false"); webApp.setMaxFormContentSize(maxFormContentSize); + webApp.setMaxFormKeys(maxFormKeys); // GZIP handler final GzipHandler gzipHandler = new GzipHandler(); @@ -365,4 +371,8 @@ public class ServerDaemon implements Daemon { public void setMaxFormContentSize(int maxFormContentSize) { this.maxFormContentSize = maxFormContentSize; } + + public void setMaxFormKeys(int maxFormKeys) { + this.maxFormKeys = maxFormKeys; + } } From 70776b067a4420e60ded8e0f825fc1b6c88721cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernardo=20De=20Marco=20Gon=C3=A7alves?= Date: Tue, 21 Jan 2025 06:33:55 -0300 Subject: [PATCH 09/50] fix listing of VMs by network (#10204) --- server/src/main/java/com/cloud/api/query/QueryManagerImpl.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java index 389a16a5542..42128525782 100644 --- a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java +++ b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java @@ -1377,6 +1377,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q if (networkId != null || vpcId != null) { SearchBuilder nicSearch = nicDao.createSearchBuilder(); nicSearch.and("networkId", nicSearch.entity().getNetworkId(), Op.EQ); + nicSearch.and("removed", nicSearch.entity().getRemoved(), Op.NULL); if (vpcId != null) { SearchBuilder networkSearch = networkDao.createSearchBuilder(); networkSearch.and("vpcId", networkSearch.entity().getVpcId(), Op.EQ); From 1ff68cf9b10d9064dbdb10b840000ad161f734bd Mon Sep 17 00:00:00 2001 From: Rene Peinthor Date: Tue, 21 Jan 2025 11:10:17 +0100 Subject: [PATCH 10/50] linstor: Fix ZFS snapshot backup (#10219) Linstor plugin used the wrong zfs dataset path to hide/unhide the snapshot device. Also don't use the full path to the zfs binary. --- plugins/storage/volume/linstor/CHANGELOG.md | 6 ++++++ .../LinstorBackupSnapshotCommandWrapper.java | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/plugins/storage/volume/linstor/CHANGELOG.md b/plugins/storage/volume/linstor/CHANGELOG.md index 957377e2978..419a7f983ee 100644 --- a/plugins/storage/volume/linstor/CHANGELOG.md +++ b/plugins/storage/volume/linstor/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to Linstor CloudStack plugin will be documented in this file The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2025-01-20] + +### Fixed + +- Volume snapshots on zfs used the wrong dataset path to hide/unhide snapdev + ## [2024-12-13] ### Fixed diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java index a210d53d7e7..a572759c35a 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LinstorBackupSnapshotCommandWrapper.java @@ -45,11 +45,17 @@ public final class LinstorBackupSnapshotCommandWrapper { private static final Logger s_logger = Logger.getLogger(LinstorBackupSnapshotCommandWrapper.class); + private static String zfsDatasetName(String zfsFullSnapshotUrl) { + String zfsFullPath = zfsFullSnapshotUrl.substring(6); + int atPos = zfsFullPath.indexOf('@'); + return atPos >= 0 ? zfsFullPath.substring(0, atPos) : zfsFullPath; + } + private String zfsSnapdev(boolean hide, String zfsUrl) { - Script script = new Script("/usr/bin/zfs", Duration.millis(5000)); + Script script = new Script("zfs", Duration.millis(5000)); script.add("set"); script.add("snapdev=" + (hide ? "hidden" : "visible")); - script.add(zfsUrl.substring(6)); // cutting zfs:// + script.add(zfsDatasetName(zfsUrl)); // cutting zfs:// and @snapshotname return script.execute(); } @@ -133,10 +139,10 @@ public final class LinstorBackupSnapshotCommandWrapper s_logger.info("Src: " + srcPath + " | " + src.getName()); if (srcPath.startsWith("zfs://")) { zfsHidden = true; - if (zfsSnapdev(false, srcPath) != null) { + if (zfsSnapdev(false, src.getPath()) != null) { return new CopyCmdAnswer("Unable to unhide zfs snapshot device."); } - srcPath = "/dev/" + srcPath.substring(6); + srcPath = "/dev/zvol/" + srcPath.substring(6); } secondaryPool = storagePoolMgr.getStoragePoolByURI(dstDataStore.getUrl()); From 91f1adab862e3be5ee9931ca97b9eac920550757 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 21 Jan 2025 14:29:37 +0100 Subject: [PATCH 11/50] UI: set redundant state as N/A for non-redundant routers (#10227) --- ui/src/components/view/ListView.vue | 4 ++++ ui/src/components/widgets/Status.vue | 2 ++ 2 files changed, 6 insertions(+) diff --git a/ui/src/components/view/ListView.vue b/ui/src/components/view/ListView.vue index 61b688dc75a..eaa54939d07 100644 --- a/ui/src/components/view/ListView.vue +++ b/ui/src/components/view/ListView.vue @@ -220,6 +220,10 @@ + diff --git a/ui/src/components/widgets/Status.vue b/ui/src/components/widgets/Status.vue index 22b7849aa61..a8b7327e3c7 100644 --- a/ui/src/components/widgets/Status.vue +++ b/ui/src/components/widgets/Status.vue @@ -113,6 +113,7 @@ export default { case 'up': case 'success': case 'poweron': + case 'primary': status = 'success' break case 'alert': @@ -149,6 +150,7 @@ export default { case 'pending': case 'unsecure': case 'warning': + case 'backup': status = 'warning' break } From 96b757c35bc7d1f0da1d03d7df7cbf094b5bb385 Mon Sep 17 00:00:00 2001 From: Rohit Yadav Date: Tue, 21 Jan 2025 21:18:53 +0530 Subject: [PATCH 12/50] packaging: have noarch defined for rpms (#10057) Signed-off-by: Rohit Yadav --- packaging/el8/cloud.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packaging/el8/cloud.spec b/packaging/el8/cloud.spec index e34778820cb..fbbb7abe350 100644 --- a/packaging/el8/cloud.spec +++ b/packaging/el8/cloud.spec @@ -17,6 +17,8 @@ %define __os_install_post %{nil} %global debug_package %{nil} +%global __requires_exclude libc\\.so\\..* +%define _binaries_in_noarch_packages_terminate_build 0 # DISABLE the post-percentinstall java repacking and line number stripping # we need to find a way to just disable the java repacking and line number stripping, but not the autodeps @@ -35,6 +37,7 @@ Group: System Environment/Libraries # FIXME do groups for every single one of the subpackages Source0: %{name}-%{_maventag}.tgz BuildRoot: %{_tmppath}/%{name}-%{_maventag}-%{release}-build +BuildArch: noarch BuildRequires: (java-11-openjdk-devel or java-17-openjdk-devel) #BuildRequires: ws-commons-util @@ -117,7 +120,7 @@ Requires: qemu-kvm Requires: cryptsetup Requires: rng-tools Requires: (libgcrypt > 1.8.3 or libgcrypt20) -Requires: (selinux-tools if qemu-tools) +Requires: (selinux-tools if selinux-tools) Requires: sysstat Provides: cloud-agent Group: System Environment/Libraries From d053bb97ecf14ad85f9af2fe6dd521e80117165a Mon Sep 17 00:00:00 2001 From: Harikrishna Date: Wed, 21 Aug 2024 12:07:31 +0530 Subject: [PATCH 13/50] Fix to allow actions on the network if it belongs to a project (#9550) --- ui/src/config/section/network.js | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ui/src/config/section/network.js b/ui/src/config/section/network.js index 5a3221810be..72eb17e0ae9 100644 --- a/ui/src/config/section/network.js +++ b/ui/src/config/section/network.js @@ -140,7 +140,9 @@ export default { icon: 'edit-outlined', label: 'label.update.network', dataView: true, - disabled: (record, user) => { return (record.account !== user.userInfo.account && !['Admin', 'DomainAdmin'].includes(user.userInfo.roletype)) }, + disabled: (record, user) => { + return !record.projectid && (record.account !== user.userInfo.account && !['Admin', 'DomainAdmin'].includes(user.userInfo.roletype)) + }, popup: true, component: shallowRef(defineAsyncComponent(() => import('@/views/network/UpdateNetwork.vue'))) }, @@ -150,7 +152,9 @@ export default { label: 'label.restart.network', message: 'message.restart.network', dataView: true, - disabled: (record, user) => { return (record.account !== user.userInfo.account && !['Admin', 'DomainAdmin'].includes(user.userInfo.roletype)) }, + disabled: (record, user) => { + return !record.projectid && (record.account !== user.userInfo.account && !['Admin', 'DomainAdmin'].includes(user.userInfo.roletype)) + }, args: (record, store, isGroupAction) => { var fields = [] if (isGroupAction || record.vpcid == null) { @@ -189,7 +193,9 @@ export default { label: 'label.action.delete.network', message: 'message.action.delete.network', dataView: true, - disabled: (record, user) => { return (record.account !== user.userInfo.account && !['Admin', 'DomainAdmin'].includes(user.userInfo.roletype)) }, + disabled: (record, user) => { + return !record.projectid && (record.account !== user.userInfo.account && !['Admin', 'DomainAdmin'].includes(user.userInfo.roletype)) + }, groupAction: true, popup: true, groupMap: (selection) => { return selection.map(x => { return { id: x } }) } From b186272f6878edcfd7145a3b9a9d0c61311ad94b Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Wed, 22 Jan 2025 14:00:02 +0100 Subject: [PATCH 14/50] kvm: add SCSI controllers based on the number of virtio-SCSI disks (#9823) According to libvirt code, the units per scsi controller is set to 7 therefore, we need to create scsi controller every 7 disks (including CDROM). https://github.com/libvirt/libvirt/blob/50cc7a0d9d2b9df085ec073a6d60820a9642158a/src/conf/domain_conf.h#L3007-L3008 https://github.com/libvirt/libvirt/blob/50cc7a0d9d2b9df085ec073a6d60820a9642158a/src/conf/domain_conf.c#L6701-L6704 --- .../kvm/resource/LibvirtComputingResource.java | 17 ++++++++++++++--- .../resource/LibvirtComputingResourceTest.java | 5 ++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index f9d56f8301d..12212924244 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -2646,7 +2646,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv Map details = vmTO.getDetails(); boolean isIothreadsEnabled = details != null && details.containsKey(VmDetailConstants.IOTHREADS); - devices.addDevice(createSCSIDef(vcpus, isIothreadsEnabled)); + addSCSIControllers(devices, vcpus, vmTO.getDisks().length, isIothreadsEnabled); } return devices; } @@ -2684,8 +2684,19 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv * Creates Virtio SCSI controller.
* The respective Virtio SCSI XML definition is generated only if the VM's Disk Bus is of ISCSI. */ - protected SCSIDef createSCSIDef(int vcpus, boolean isIothreadsEnabled) { - return new SCSIDef((short)0, 0, 0, 9, 0, vcpus, isIothreadsEnabled); + protected SCSIDef createSCSIDef(short index, int vcpus, boolean isIothreadsEnabled) { + return new SCSIDef(index, 0, 0, 9 + index, 0, vcpus, isIothreadsEnabled); + } + + + private void addSCSIControllers(DevicesDef devices, int vcpus, int diskCount, boolean isIothreadsEnabled) { + int controllers = diskCount / 7; + if (diskCount % 7 != 0) { + controllers++; + } + for (int i = 0; i < controllers; i++) { + devices.addDevice(createSCSIDef((short)i, vcpus, isIothreadsEnabled)); + } } protected ConsoleDef createConsoleDef() { diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java index bbd1f8a73f2..9b1da988c29 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java @@ -453,6 +453,9 @@ public class LibvirtComputingResourceTest { to.setDetails(new HashMap<>()); to.setPlatformEmulator("Other PV Virtio-SCSI"); + final DiskTO diskTO = Mockito.mock(DiskTO.class); + to.setDisks(new DiskTO[]{diskTO}); + GuestDef guest = new GuestDef(); guest.setGuestType(GuestType.KVM); @@ -640,7 +643,7 @@ public class LibvirtComputingResourceTest { public void testCreateSCSIDef() { VirtualMachineTO to = createDefaultVM(false); - SCSIDef scsiDef = libvirtComputingResourceSpy.createSCSIDef(to.getCpus(), false); + SCSIDef scsiDef = libvirtComputingResourceSpy.createSCSIDef((short)0, to.getCpus(), false); Document domainDoc = parse(scsiDef.toString()); verifyScsi(to, domainDoc, ""); } From 09f154796a2a10f1c74d6f2087d1a3fe86694fb4 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Wed, 22 Jan 2025 15:44:54 +0100 Subject: [PATCH 15/50] server: Fix host CPU number (#10218) --- .../response/HostForMigrationResponse.java | 431 +----------------- .../cloud/api/query/dao/HostJoinDaoImpl.java | 121 +---- 2 files changed, 8 insertions(+), 544 deletions(-) diff --git a/api/src/main/java/org/apache/cloudstack/api/response/HostForMigrationResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/HostForMigrationResponse.java index 41a0fdc4567..b4de48baec4 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/HostForMigrationResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/HostForMigrationResponse.java @@ -16,449 +16,20 @@ // under the License. package org.apache.cloudstack.api.response; -import java.util.Date; - -import org.apache.cloudstack.api.ApiConstants; -import org.apache.cloudstack.api.BaseResponse; import org.apache.cloudstack.api.EntityReference; import com.cloud.host.Host; -import com.cloud.host.Status; import com.cloud.serializer.Param; import com.google.gson.annotations.SerializedName; @EntityReference(value = Host.class) -public class HostForMigrationResponse extends BaseResponse { - @SerializedName(ApiConstants.ID) - @Param(description = "the ID of the host") - private String id; - - @SerializedName(ApiConstants.NAME) - @Param(description = "the name of the host") - private String name; - - @SerializedName(ApiConstants.STATE) - @Param(description = "the state of the host") - private Status state; - - @SerializedName("disconnected") - @Param(description = "true if the host is disconnected. False otherwise.") - private Date disconnectedOn; - - @SerializedName(ApiConstants.TYPE) - @Param(description = "the host type") - private Host.Type hostType; - - @SerializedName("oscategoryid") - @Param(description = "the OS category ID of the host") - private String osCategoryId; - - @SerializedName("oscategoryname") - @Param(description = "the OS category name of the host") - private String osCategoryName; - - @SerializedName(ApiConstants.IP_ADDRESS) - @Param(description = "the IP address of the host") - private String ipAddress; - - @SerializedName(ApiConstants.ZONE_ID) - @Param(description = "the Zone ID of the host") - private String zoneId; - - @SerializedName(ApiConstants.ZONE_NAME) - @Param(description = "the Zone name of the host") - private String zoneName; - - @SerializedName(ApiConstants.POD_ID) - @Param(description = "the Pod ID of the host") - private String podId; - - @SerializedName("podname") - @Param(description = "the Pod name of the host") - private String podName; - - @SerializedName("version") - @Param(description = "the host version") - private String version; - - @SerializedName(ApiConstants.HYPERVISOR) - @Param(description = "the host hypervisor") - private String hypervisor; - - @SerializedName("cpunumber") - @Param(description = "the CPU number of the host") - private Integer cpuNumber; - - @SerializedName("cpuspeed") - @Param(description = "the CPU speed of the host") - private Long cpuSpeed; - - @Deprecated - @SerializedName("cpuallocated") - @Param(description = "the amount of the host's CPU currently allocated") - private String cpuAllocated; - - @SerializedName("cpuallocatedvalue") - @Param(description = "the amount of the host's CPU currently allocated in MHz") - private Long cpuAllocatedValue; - - @SerializedName("cpuallocatedpercentage") - @Param(description = "the amount of the host's CPU currently allocated in percentage") - private String cpuAllocatedPercentage; - - @SerializedName("cpuallocatedwithoverprovisioning") - @Param(description = "the amount of the host's CPU currently allocated after applying the cpu.overprovisioning.factor") - private String cpuAllocatedWithOverprovisioning; - - @SerializedName("cpuused") - @Param(description = "the amount of the host's CPU currently used") - private String cpuUsed; - - @SerializedName("cpuwithoverprovisioning") - @Param(description = "the amount of the host's CPU after applying the cpu.overprovisioning.factor ") - private String cpuWithOverprovisioning; - - @Deprecated - @SerializedName("memorytotal") - @Param(description = "the memory total of the host, this parameter is deprecated use memorywithoverprovisioning") - private Long memoryTotal; - - @SerializedName("memorywithoverprovisioning") - @Param(description = "the amount of the host's memory after applying the mem.overprovisioning.factor ") - private String memWithOverprovisioning; - - @SerializedName("averageload") - @Param(description = "the cpu average load on the host") - private Long averageLoad; - - @SerializedName("networkkbsread") - @Param(description = "the incoming network traffic on the host") - private Long networkKbsRead; - - @SerializedName("networkkbswrite") - @Param(description = "the outgoing network traffic on the host") - private Long networkKbsWrite; - - @Deprecated - @SerializedName("memoryallocated") - @Param(description = "the amount of the host's memory currently allocated") - private String memoryAllocated; - - @SerializedName("memoryallocatedpercentage") - @Param(description = "the amount of the host's memory currently allocated in percentage") - private String memoryAllocatedPercentage; - - @SerializedName("memoryallocatedbytes") - @Param(description = "the amount of the host's memory currently allocated in bytes") - private Long memoryAllocatedBytes; - - @SerializedName("memoryused") - @Param(description = "the amount of the host's memory currently used") - private Long memoryUsed; - - @SerializedName("disksizetotal") - @Param(description = "the total disk size of the host") - private Long diskSizeTotal; - - @SerializedName("disksizeallocated") - @Param(description = "the host's currently allocated disk size") - private Long diskSizeAllocated; - - @SerializedName("capabilities") - @Param(description = "capabilities of the host") - private String capabilities; - - @SerializedName("lastpinged") - @Param(description = "the date and time the host was last pinged") - private Date lastPinged; - - @SerializedName("managementserverid") - @Param(description = "the management server ID of the host") - private Long managementServerId; - - @SerializedName("clusterid") - @Param(description = "the cluster ID of the host") - private String clusterId; - - @SerializedName("clustername") - @Param(description = "the cluster name of the host") - private String clusterName; - - @SerializedName("clustertype") - @Param(description = "the cluster type of the cluster that host belongs to") - private String clusterType; - - @SerializedName("islocalstorageactive") - @Param(description = "true if local storage is active, false otherwise") - private Boolean localStorageActive; - - @SerializedName(ApiConstants.CREATED) - @Param(description = "the date and time the host was created") - private Date created; - - @SerializedName("removed") - @Param(description = "the date and time the host was removed") - private Date removed; - - @SerializedName("events") - @Param(description = "events available for the host") - private String events; - - @SerializedName("hosttags") - @Param(description = "comma-separated list of tags for the host") - private String hostTags; - - @SerializedName("hasenoughcapacity") - @Param(description = "true if this host has enough CPU and RAM capacity to migrate a VM to it, false otherwise") - private Boolean hasEnoughCapacity; - - @SerializedName("suitableformigration") - @Param(description = "true if this host is suitable(has enough capacity and satisfies all conditions like hosttags, " + - "max guests vm limit etc) to migrate a VM to it , false otherwise") - private Boolean suitableForMigration; +public class HostForMigrationResponse extends HostResponse { @SerializedName("requiresStorageMotion") @Param(description = "true if migrating a vm to this host requires storage motion, false otherwise") private Boolean requiresStorageMotion; - @SerializedName("resourcestate") - @Param(description = "the resource state of the host") - private String resourceState; - - @SerializedName(ApiConstants.HYPERVISOR_VERSION) - @Param(description = "the hypervisor version") - private String hypervisorVersion; - - @SerializedName(ApiConstants.HA_HOST) - @Param(description = "true if the host is Ha host (dedicated to vms started by HA process; false otherwise") - private Boolean haHost; - - @Override - public String getObjectId() { - return getId(); - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public void setName(String name) { - this.name = name; - } - - public void setState(Status state) { - this.state = state; - } - - public void setDisconnectedOn(Date disconnectedOn) { - this.disconnectedOn = disconnectedOn; - } - - public void setHostType(Host.Type hostType) { - this.hostType = hostType; - } - - public void setOsCategoryId(String osCategoryId) { - this.osCategoryId = osCategoryId; - } - - public void setOsCategoryName(String osCategoryName) { - this.osCategoryName = osCategoryName; - } - - public void setIpAddress(String ipAddress) { - this.ipAddress = ipAddress; - } - - public void setZoneId(String zoneId) { - this.zoneId = zoneId; - } - - public void setZoneName(String zoneName) { - this.zoneName = zoneName; - } - - public void setPodId(String podId) { - this.podId = podId; - } - - public void setPodName(String podName) { - this.podName = podName; - } - - public void setVersion(String version) { - this.version = version; - } - - public void setHypervisor(String hypervisor) { - this.hypervisor = hypervisor; - } - - public void setCpuNumber(Integer cpuNumber) { - this.cpuNumber = cpuNumber; - } - - public void setCpuSpeed(Long cpuSpeed) { - this.cpuSpeed = cpuSpeed; - } - - public String getCpuAllocated() { - return cpuAllocated; - } - - public void setCpuAllocated(String cpuAllocated) { - this.cpuAllocated = cpuAllocated; - } - - public void setCpuAllocatedValue(Long cpuAllocatedValue) { - this.cpuAllocatedValue = cpuAllocatedValue; - } - - public void setCpuAllocatedPercentage(String cpuAllocatedPercentage) { - this.cpuAllocatedPercentage = cpuAllocatedPercentage; - } - - public void setCpuAllocatedWithOverprovisioning(String cpuAllocatedWithOverprovisioning) { - this.cpuAllocatedWithOverprovisioning = cpuAllocatedWithOverprovisioning; - } - - public void setCpuUsed(String cpuUsed) { - this.cpuUsed = cpuUsed; - } - - public void setAverageLoad(Long averageLoad) { - this.averageLoad = averageLoad; - } - - public void setNetworkKbsRead(Long networkKbsRead) { - this.networkKbsRead = networkKbsRead; - } - - public void setNetworkKbsWrite(Long networkKbsWrite) { - this.networkKbsWrite = networkKbsWrite; - } - - public void setMemoryAllocated(String memoryAllocated) { - this.memoryAllocated = memoryAllocated; - } - - public void setMemoryAllocatedPercentage(String memoryAllocatedPercentage) { - this.memoryAllocatedPercentage = memoryAllocatedPercentage; - } - - public void setMemoryAllocatedBytes(Long memoryAllocatedBytes) { - this.memoryAllocatedBytes = memoryAllocatedBytes; - } - - public void setMemoryUsed(Long memoryUsed) { - this.memoryUsed = memoryUsed; - } - - public void setDiskSizeTotal(Long diskSizeTotal) { - this.diskSizeTotal = diskSizeTotal; - } - - public void setDiskSizeAllocated(Long diskSizeAllocated) { - this.diskSizeAllocated = diskSizeAllocated; - } - - public void setCapabilities(String capabilities) { - this.capabilities = capabilities; - } - - public void setLastPinged(Date lastPinged) { - this.lastPinged = lastPinged; - } - - public void setManagementServerId(Long managementServerId) { - this.managementServerId = managementServerId; - } - - public void setClusterId(String clusterId) { - this.clusterId = clusterId; - } - - public void setClusterName(String clusterName) { - this.clusterName = clusterName; - } - - public void setClusterType(String clusterType) { - this.clusterType = clusterType; - } - - public void setLocalStorageActive(Boolean localStorageActive) { - this.localStorageActive = localStorageActive; - } - - public void setCreated(Date created) { - this.created = created; - } - - public void setRemoved(Date removed) { - this.removed = removed; - } - - public void setEvents(String events) { - this.events = events; - } - - public String getHostTags() { - return hostTags; - } - - public void setHostTags(String hostTags) { - this.hostTags = hostTags; - } - - public void setHasEnoughCapacity(Boolean hasEnoughCapacity) { - this.hasEnoughCapacity = hasEnoughCapacity; - } - - public void setSuitableForMigration(Boolean suitableForMigration) { - this.suitableForMigration = suitableForMigration; - } - public void setRequiresStorageMotion(Boolean requiresStorageMotion) { this.requiresStorageMotion = requiresStorageMotion; } - - public String getResourceState() { - return resourceState; - } - - public void setResourceState(String resourceState) { - this.resourceState = resourceState; - } - - public String getCpuWithOverprovisioning() { - return cpuWithOverprovisioning; - } - - public void setCpuWithOverprovisioning(String cpuWithOverprovisioning) { - this.cpuWithOverprovisioning = cpuWithOverprovisioning; - } - - public void setMemWithOverprovisioning(String memWithOverprovisioning){ - this.memWithOverprovisioning=memWithOverprovisioning; - } - - public void setHypervisorVersion(String hypervisorVersion) { - this.hypervisorVersion = hypervisorVersion; - } - - public Boolean getHaHost() { - return haHost; - } - - public void setHaHost(Boolean haHost) { - this.haHost = haHost; - } - - public void setMemoryTotal(Long memoryTotal) { - this.memoryTotal = memoryTotal; - } } diff --git a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java index b0b061d78c5..9d361b093e0 100644 --- a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java @@ -115,9 +115,7 @@ public class HostJoinDaoImpl extends GenericDaoBase implements return result; } - @Override - public HostResponse newHostResponse(HostJoinVO host, EnumSet details) { - HostResponse hostResponse = new HostResponse(); + private void setNewHostResponseBase(HostJoinVO host, EnumSet details, HostResponse hostResponse) { hostResponse.setId(host.getUuid()); hostResponse.setCapabilities(host.getCapabilities()); hostResponse.setClusterId(host.getClusterUuid()); @@ -189,7 +187,6 @@ public class HostJoinDaoImpl extends GenericDaoBase implements DecimalFormat decimalFormat = new DecimalFormat("#.##"); if (host.getType() == Host.Type.Routing) { float cpuOverprovisioningFactor = ApiDBUtils.getCpuOverprovisioningFactor(host.getClusterId()); - hostResponse.setCpuNumber((int)(host.getCpus() * cpuOverprovisioningFactor)); if (details.contains(HostDetails.all) || details.contains(HostDetails.capacity)) { // set allocated capacities Long mem = host.getMemReservedCapacity() + host.getMemUsedCapacity(); @@ -295,123 +292,19 @@ public class HostJoinDaoImpl extends GenericDaoBase implements hostResponse.setUsername(host.getUsername()); hostResponse.setObjectName("host"); + } + @Override + public HostResponse newHostResponse(HostJoinVO host, EnumSet details) { + HostResponse hostResponse = new HostResponse(); + setNewHostResponseBase(host, details, hostResponse); return hostResponse; } @Override public HostForMigrationResponse newHostForMigrationResponse(HostJoinVO host, EnumSet details) { HostForMigrationResponse hostResponse = new HostForMigrationResponse(); - hostResponse.setId(host.getUuid()); - hostResponse.setCapabilities(host.getCapabilities()); - hostResponse.setClusterId(host.getClusterUuid()); - hostResponse.setCpuNumber(host.getCpus()); - hostResponse.setZoneId(host.getZoneUuid()); - hostResponse.setDisconnectedOn(host.getDisconnectedOn()); - hostResponse.setHypervisor(host.getHypervisorType().getHypervisorDisplayName()); - hostResponse.setHostType(host.getType()); - hostResponse.setLastPinged(new Date(host.getLastPinged())); - hostResponse.setManagementServerId(host.getManagementServerId()); - hostResponse.setName(host.getName()); - hostResponse.setPodId(host.getPodUuid()); - hostResponse.setRemoved(host.getRemoved()); - hostResponse.setCpuSpeed(host.getSpeed()); - hostResponse.setState(host.getStatus()); - hostResponse.setIpAddress(host.getPrivateIpAddress()); - hostResponse.setVersion(host.getVersion()); - hostResponse.setCreated(host.getCreated()); - - if (details.contains(HostDetails.all) || details.contains(HostDetails.capacity) || details.contains(HostDetails.stats) || details.contains(HostDetails.events)) { - - hostResponse.setOsCategoryId(host.getOsCategoryUuid()); - hostResponse.setOsCategoryName(host.getOsCategoryName()); - hostResponse.setZoneName(host.getZoneName()); - hostResponse.setPodName(host.getPodName()); - if (host.getClusterId() > 0) { - hostResponse.setClusterName(host.getClusterName()); - hostResponse.setClusterType(host.getClusterType().toString()); - } - } - - DecimalFormat decimalFormat = new DecimalFormat("#.##"); - if (host.getType() == Host.Type.Routing) { - if (details.contains(HostDetails.all) || details.contains(HostDetails.capacity)) { - // set allocated capacities - Long mem = host.getMemReservedCapacity() + host.getMemUsedCapacity(); - Long cpu = host.getCpuReservedCapacity() + host.getCpuUsedCapacity(); - - hostResponse.setMemoryTotal(host.getTotalMemory()); - Float memWithOverprovisioning = host.getTotalMemory() * ApiDBUtils.getMemOverprovisioningFactor(host.getClusterId()); - hostResponse.setMemWithOverprovisioning(decimalFormat.format(memWithOverprovisioning)); - String memoryAllocatedPercentage = decimalFormat.format((float) mem / memWithOverprovisioning * 100.0f) +"%"; - hostResponse.setMemoryAllocated(memoryAllocatedPercentage); - hostResponse.setMemoryAllocatedPercentage(memoryAllocatedPercentage); - hostResponse.setMemoryAllocatedBytes(mem); - - String hostTags = host.getTag(); - hostResponse.setHostTags(hostTags); - hostResponse.setHaHost(containsHostHATag(hostTags)); - - hostResponse.setHypervisorVersion(host.getHypervisorVersion()); - - hostResponse.setCpuAllocatedValue(cpu); - String cpuAlloc = decimalFormat.format(((float)cpu / (float)(host.getCpus() * host.getSpeed())) * 100f) + "%"; - hostResponse.setCpuAllocated(cpuAlloc); - hostResponse.setCpuAllocatedPercentage(cpuAlloc); - float cpuWithOverprovisioning = host.getCpus() * host.getSpeed() * ApiDBUtils.getCpuOverprovisioningFactor(host.getClusterId()); - hostResponse.setCpuAllocatedWithOverprovisioning(calculateResourceAllocatedPercentage(cpu, cpuWithOverprovisioning)); - hostResponse.setCpuWithOverprovisioning(decimalFormat.format(cpuWithOverprovisioning)); - } - - if (details.contains(HostDetails.all) || details.contains(HostDetails.stats)) { - // set CPU/RAM/Network stats - String cpuUsed = null; - HostStats hostStats = ApiDBUtils.getHostStatistics(host.getId()); - if (hostStats != null) { - float cpuUtil = (float)hostStats.getCpuUtilization(); - cpuUsed = decimalFormat.format(cpuUtil) + "%"; - hostResponse.setCpuUsed(cpuUsed); - hostResponse.setMemoryUsed((new Double(hostStats.getUsedMemory())).longValue()); - hostResponse.setNetworkKbsRead((new Double(hostStats.getNetworkReadKBs())).longValue()); - hostResponse.setNetworkKbsWrite((new Double(hostStats.getNetworkWriteKBs())).longValue()); - - } - } - - } else if (host.getType() == Host.Type.SecondaryStorage) { - StorageStats secStorageStats = ApiDBUtils.getSecondaryStorageStatistics(host.getId()); - if (secStorageStats != null) { - hostResponse.setDiskSizeTotal(secStorageStats.getCapacityBytes()); - hostResponse.setDiskSizeAllocated(secStorageStats.getByteUsed()); - } - } - - hostResponse.setLocalStorageActive(ApiDBUtils.isLocalStorageActiveOnHost(host.getId())); - - if (details.contains(HostDetails.all) || details.contains(HostDetails.events)) { - Set possibleEvents = host.getStatus().getPossibleEvents(); - if ((possibleEvents != null) && !possibleEvents.isEmpty()) { - String events = ""; - Iterator iter = possibleEvents.iterator(); - while (iter.hasNext()) { - com.cloud.host.Status.Event event = iter.next(); - events += event.toString(); - if (iter.hasNext()) { - events += "; "; - } - } - hostResponse.setEvents(events); - } - } - - hostResponse.setResourceState(host.getResourceState().toString()); - - // set async job - hostResponse.setJobId(host.getJobUuid()); - hostResponse.setJobStatus(host.getJobStatus()); - - hostResponse.setObjectName("host"); - + setNewHostResponseBase(host, details, hostResponse); return hostResponse; } From 1e59f5cd0cd5e6622bee06000512059a656456ae Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Wed, 22 Jan 2025 20:44:04 +0530 Subject: [PATCH 16/50] ui: fix passing vlan while creating vpc tier (#10239) Signed-off-by: Abhishek Kumar --- ui/src/views/network/VpcTiersTab.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/src/views/network/VpcTiersTab.vue b/ui/src/views/network/VpcTiersTab.vue index 402f9f06115..ad06e3bdf0b 100644 --- a/ui/src/views/network/VpcTiersTab.vue +++ b/ui/src/views/network/VpcTiersTab.vue @@ -213,7 +213,7 @@ @change="updateMtu()"/>
- + From 20759187b3153b6a04070a037bf8a7c63c630bb0 Mon Sep 17 00:00:00 2001 From: Harikrishna Date: Thu, 23 Jan 2025 12:46:33 +0530 Subject: [PATCH 17/50] Fix local storage deletion cases (#10231) * Delete local storage properties in agent.properties during delete pool * Fix stale entry when add local storage failed * Smaller methods * Comment added --- .../resource/LibvirtComputingResource.java | 2 +- ...ibvirtDeleteStoragePoolCommandWrapper.java | 60 +++++++++++++++++-- .../com/cloud/storage/StorageManagerImpl.java | 14 ++++- 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 12212924244..7df170cd361 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -431,7 +431,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv protected static final String LOCAL_STORAGE_PATH = "local.storage.path"; protected static final String LOCAL_STORAGE_UUID = "local.storage.uuid"; - protected static final String DEFAULT_LOCAL_STORAGE_PATH = "/var/lib/libvirt/images/"; + public static final String DEFAULT_LOCAL_STORAGE_PATH = "/var/lib/libvirt/images"; protected List localStoragePaths = new ArrayList<>(); protected List localStorageUUIDs = new ArrayList<>(); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtDeleteStoragePoolCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtDeleteStoragePoolCommandWrapper.java index 716df4789f8..ad3ba80253e 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtDeleteStoragePoolCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtDeleteStoragePoolCommandWrapper.java @@ -22,12 +22,20 @@ package com.cloud.hypervisor.kvm.resource.wrapper; import com.cloud.agent.api.Answer; import com.cloud.agent.api.DeleteStoragePoolCommand; import com.cloud.agent.api.to.StorageFilerTO; +import com.cloud.agent.dao.impl.PropertiesStorage; +import com.cloud.agent.properties.AgentProperties; +import com.cloud.agent.properties.AgentPropertiesFileHandler; import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; import com.cloud.hypervisor.kvm.storage.KVMStoragePoolManager; import com.cloud.resource.CommandWrapper; import com.cloud.resource.ResourceWrapper; +import com.cloud.storage.Storage; import com.cloud.utils.exception.CloudRuntimeException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.stream.Collectors; + @ResourceWrapper(handles = DeleteStoragePoolCommand.class) public final class LibvirtDeleteStoragePoolCommandWrapper extends CommandWrapper { @Override @@ -35,15 +43,57 @@ public final class LibvirtDeleteStoragePoolCommandWrapper extends CommandWrapper try { // if getRemoveDatastore() is true, then we are dealing with managed storage and can skip the delete logic here if (!command.getRemoveDatastore()) { - final StorageFilerTO pool = command.getPool(); - final KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr(); - - storagePoolMgr.deleteStoragePool(pool.getType(), pool.getUuid()); + handleStoragePoolDeletion(command, libvirtComputingResource); } - return new Answer(command); } catch (final CloudRuntimeException e) { return new Answer(command, false, e.toString()); } } + + private void handleStoragePoolDeletion(final DeleteStoragePoolCommand command, final LibvirtComputingResource libvirtComputingResource) { + final StorageFilerTO pool = command.getPool(); + final KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr(); + storagePoolMgr.deleteStoragePool(pool.getType(), pool.getUuid()); + + if (isLocalStorageAndNotHavingDefaultPath(pool, libvirtComputingResource)) { + updateLocalStorageProperties(pool); + } + } + + private boolean isLocalStorageAndNotHavingDefaultPath(final StorageFilerTO pool, final LibvirtComputingResource libvirtComputingResource) { + return Storage.StoragePoolType.Filesystem.equals(pool.getType()) + && !libvirtComputingResource.DEFAULT_LOCAL_STORAGE_PATH.equals(pool.getPath()); + } + + private void updateLocalStorageProperties(final StorageFilerTO pool) { + String localStoragePath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.LOCAL_STORAGE_PATH); + String localStorageUuid = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.LOCAL_STORAGE_UUID); + + String uuidToRemove = pool.getUuid(); + String pathToRemove = pool.getPath(); + + if (localStorageUuid != null && uuidToRemove != null) { + localStorageUuid = Arrays.stream(localStorageUuid.split(",")) + .filter(uuid -> !uuid.equals(uuidToRemove)) + .collect(Collectors.joining(",")); + } + + if (localStoragePath != null && pathToRemove != null) { + localStoragePath = Arrays.stream(localStoragePath.split(",")) + .filter(path -> !path.equals(pathToRemove)) + .collect(Collectors.joining(",")); + } + + PropertiesStorage agentProperties = new PropertiesStorage(); + agentProperties.configure("AgentProperties", new HashMap()); + + if (localStorageUuid != null) { + agentProperties.persist(AgentProperties.LOCAL_STORAGE_UUID.getName(), localStorageUuid); + } + + if (localStoragePath != null) { + agentProperties.persist(AgentProperties.LOCAL_STORAGE_PATH.getName(), localStoragePath); + } + } } diff --git a/server/src/main/java/com/cloud/storage/StorageManagerImpl.java b/server/src/main/java/com/cloud/storage/StorageManagerImpl.java index 36e0f582df8..0a45fd448ad 100644 --- a/server/src/main/java/com/cloud/storage/StorageManagerImpl.java +++ b/server/src/main/java/com/cloud/storage/StorageManagerImpl.java @@ -803,7 +803,9 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C if (!(dc.isLocalStorageEnabled() || useLocalStorageForSystemVM)) { return null; } - DataStore store; + DataStore store = null; + DataStoreProvider provider = _dataStoreProviderMgr.getDefaultPrimaryDataStoreProvider(); + DataStoreLifeCycle lifeCycle = provider.getDataStoreLifeCycle(); try { String hostAddress = pInfo.getHost(); if (host.getHypervisorType() == Hypervisor.HypervisorType.VMware) { @@ -829,8 +831,6 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C } } - DataStoreProvider provider = _dataStoreProviderMgr.getDefaultPrimaryDataStoreProvider(); - DataStoreLifeCycle lifeCycle = provider.getDataStoreLifeCycle(); if (pool == null) { Map params = new HashMap(); String name = pInfo.getName() != null ? pInfo.getName() : createLocalStoragePoolName(host, pInfo); @@ -860,6 +860,14 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C } catch (Exception e) { s_logger.warn("Unable to setup the local storage pool for " + host, e); + try { + if (store != null) { + s_logger.debug(String.format("Trying to delete storage pool entry if exists %s", store)); + lifeCycle.deleteDataStore(store); + } + } catch (Exception ex) { + s_logger.debug(String.format("Failed to clean up local storage pool: %s", ex.getMessage())); + } throw new ConnectionException(true, "Unable to setup the local storage pool for " + host, e); } From 4d9fd1b73fa8429881782c2df7d3ffecef1701f4 Mon Sep 17 00:00:00 2001 From: Harikrishna Date: Thu, 23 Jan 2025 14:06:30 +0530 Subject: [PATCH 18/50] Added displaynetwork option in filters for listnetwork only for admin (#10209) --- ui/public/locales/en.json | 2 ++ ui/src/components/view/ListView.vue | 3 +++ ui/src/components/view/SearchView.vue | 23 ++++++++++++++++++++++- ui/src/config/section/network.js | 2 +- ui/src/views/AutogenView.vue | 3 +++ 5 files changed, 31 insertions(+), 2 deletions(-) diff --git a/ui/public/locales/en.json b/ui/public/locales/en.json index 8cc17bbb128..cb113d41b2f 100644 --- a/ui/public/locales/en.json +++ b/ui/public/locales/en.json @@ -2143,6 +2143,8 @@ "label.traffictype": "Traffic type", "label.transportzoneuuid": "Transport zone UUID", "label.trigger.shutdown": "Trigger Safe Shutdown", +"label.true": "True", +"label.false": "False", "label.try.again": "Try again", "label.tuesday": "Tuesday", "label.two.factor.authentication.secret.key": "Your Two factor authentication secret key", diff --git a/ui/src/components/view/ListView.vue b/ui/src/components/view/ListView.vue index eaa54939d07..e67f1c4cc78 100644 --- a/ui/src/components/view/ListView.vue +++ b/ui/src/components/view/ListView.vue @@ -86,6 +86,9 @@ {{ $t(text.toLowerCase()) }} {{ $t(text.toLowerCase()) }}
+ + {{ $t(text.toLowerCase()) }} + {{ text }} {{ text }} diff --git a/ui/src/components/view/SearchView.vue b/ui/src/components/view/SearchView.vue index a43dfabf868..9cbe1ef0fe6 100644 --- a/ui/src/components/view/SearchView.vue +++ b/ui/src/components/view/SearchView.vue @@ -289,9 +289,12 @@ export default { if (item === 'groupid' && !('listInstanceGroups' in this.$store.getters.apis)) { return true } + if (item === 'displaynetwork' && this.$store.getters.userInfo.roletype !== 'Admin') { + return true + } if (['zoneid', 'domainid', 'imagestoreid', 'storageid', 'state', 'account', 'hypervisor', 'level', 'clusterid', 'podid', 'groupid', 'entitytype', 'accounttype', 'systemvmtype', 'scope', 'provider', - 'type', 'serviceofferingid', 'diskofferingid'].includes(item) + 'type', 'serviceofferingid', 'diskofferingid', 'displaynetwork'].includes(item) ) { type = 'list' } else if (item === 'tags') { @@ -311,6 +314,12 @@ export default { return arrayField }, fetchStaticFieldData (arrayField) { + if (arrayField.includes('displaynetwork')) { + const typeIndex = this.fields.findIndex(item => item.name === 'displaynetwork') + this.fields[typeIndex].loading = true + this.fields[typeIndex].opts = this.fetchBoolean() + this.fields[typeIndex].loading = false + } if (arrayField.includes('type')) { if (this.$route.path === '/guestnetwork' || this.$route.path.includes('/guestnetwork/')) { const typeIndex = this.fields.findIndex(item => item.name === 'type') @@ -856,6 +865,18 @@ export default { } return types }, + fetchBoolean () { + const types = [] + types.push({ + id: 'true', + name: 'label.true' + }) + types.push({ + id: 'false', + name: 'label.false' + }) + return types + }, fetchAccountTypes () { const types = [] if (this.apiName.indexOf('listAccounts') > -1) { diff --git a/ui/src/config/section/network.js b/ui/src/config/section/network.js index 72eb17e0ae9..986a2c206c7 100644 --- a/ui/src/config/section/network.js +++ b/ui/src/config/section/network.js @@ -54,7 +54,7 @@ export default { return fields }, filters: ['all', 'account', 'domainpath', 'shared'], - searchFilters: ['keyword', 'zoneid', 'domainid', 'account', 'type', 'tags'], + searchFilters: ['keyword', 'zoneid', 'domainid', 'account', 'type', 'displaynetwork', 'tags'], related: [{ name: 'vm', title: 'label.instances', diff --git a/ui/src/views/AutogenView.vue b/ui/src/views/AutogenView.vue index 3471c02d2e4..74fae8cada4 100644 --- a/ui/src/views/AutogenView.vue +++ b/ui/src/views/AutogenView.vue @@ -915,6 +915,9 @@ export default { this.loading = true if (this.$route.params && this.$route.params.id) { params.id = this.$route.params.id + if (['listNetworks'].includes(this.apiName) && 'displaynetwork' in this.$route.query) { + params.displaynetwork = this.$route.query.displaynetwork + } if (['listSSHKeyPairs'].includes(this.apiName)) { if (!this.$isValidUuid(params.id)) { delete params.id From 7e295ec4e191bc2cd254ec6381c5876dfbffd280 Mon Sep 17 00:00:00 2001 From: Nicolas Vazquez Date: Thu, 23 Jan 2025 09:15:02 -0300 Subject: [PATCH 19/50] [KVM] Add watchdog model none to disable use of watchdogs on KVM agent (#10203) --- agent/conf/agent.properties | 1 + .../com/cloud/agent/properties/AgentProperties.java | 1 + .../cloud/hypervisor/kvm/resource/LibvirtVMDef.java | 6 +++++- .../hypervisor/kvm/resource/LibvirtVMDefTest.java | 11 +++++++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index 3b6a7b7de29..515614fff16 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -286,6 +286,7 @@ hypervisor.type=kvm # The model of Watchdog timer to present to the Guest. # For all models refer to the libvirt documentation. +# PLEASE NOTE: to disable the watchdogs definitions, use value: none #vm.watchdog.model=i6300esb # Action to take when the Guest/Instance is no longer notifying the Watchdog timer. diff --git a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java index 8f97edc3935..52679811f7c 100644 --- a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java +++ b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java @@ -516,6 +516,7 @@ public class AgentProperties{ /** * The model of Watchdog timer to present to the Guest.
* For all models refer to the libvirt documentation.
+ * PLEASE NOTE: to disable the watchdogs definitions, use value: none * Data type: String.
* Default value: i6300esb */ diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java index a67294ecadb..39373ab6e3b 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java @@ -2293,7 +2293,7 @@ public class LibvirtVMDef { public static class WatchDogDef { enum WatchDogModel { - I6300ESB("i6300esb"), IB700("ib700"), DIAG288("diag288"), ITCO("itco"); + I6300ESB("i6300esb"), IB700("ib700"), DIAG288("diag288"), ITCO("itco"), NONE("none"); String model; WatchDogModel(String model) { @@ -2346,6 +2346,10 @@ public class LibvirtVMDef { @Override public String toString() { + if (WatchDogModel.NONE == model) { + // Do not add watchodogs when the model is set to none + return ""; + } StringBuilder wacthDogBuilder = new StringBuilder(); wacthDogBuilder.append("\n"); return wacthDogBuilder.toString(); diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java index c41d487b63c..bcbf6a2238b 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java @@ -31,6 +31,7 @@ import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.DiskDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.MemBalloonDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.SCSIDef; import org.apache.cloudstack.utils.qemu.QemuObject; +import org.apache.commons.lang3.StringUtils; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.junit.MockitoJUnitRunner; @@ -537,6 +538,16 @@ public class LibvirtVMDefTest extends TestCase { assertEquals(action, def.getAction()); } + @Test + public void testWatchDofDefNone() { + LibvirtVMDef.WatchDogDef.WatchDogModel model = LibvirtVMDef.WatchDogDef.WatchDogModel.NONE; + LibvirtVMDef.WatchDogDef.WatchDogAction action = LibvirtVMDef.WatchDogDef.WatchDogAction.RESET; + LibvirtVMDef.WatchDogDef def = new LibvirtVMDef.WatchDogDef(action, model); + String result = def.toString(); + assertNotNull(result); + assertTrue(StringUtils.isBlank(result)); + } + @Test public void testSCSIDef() { SCSIDef def = new SCSIDef((short)0, 0, 0, 9, 0, 4); From c9fea62276ca1d4d0886a3513702732c502cae6d Mon Sep 17 00:00:00 2001 From: Nicolas Vazquez Date: Thu, 23 Jan 2025 11:06:38 -0300 Subject: [PATCH 20/50] [UI] Switch between allocated and used capacity on dashboard (#10215) * [UI] Switch between allocated and used capacity on dashboard * Fix text * Refactor * Internationalize capacity text --- ui/src/views/dashboard/CapacityDashboard.vue | 29 ++++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/ui/src/views/dashboard/CapacityDashboard.vue b/ui/src/views/dashboard/CapacityDashboard.vue index dae53cf0015..c2461b5adce 100644 --- a/ui/src/views/dashboard/CapacityDashboard.vue +++ b/ui/src/views/dashboard/CapacityDashboard.vue @@ -174,6 +174,12 @@
@@ -184,15 +190,19 @@

- {{ displayData(ctype, statsMap[ctype]?.capacityused) }} {{ $t('label.allocated') }} | {{ displayData(ctype, statsMap[ctype]?.capacitytotal) }} {{ $t('label.total') }} + {{ displayDataUsedOrAllocated(ctype, statsMap[ctype]?.capacityused, statsMap[ctype]?.capacityallocated) }} {{ this.displayAllocatedCompute ? $t('label.allocated') : $t('label.used') }} | {{ displayData(ctype, statsMap[ctype]?.capacitytotal) }} {{ $t('label.total') }}
@@ -346,6 +356,7 @@ export default { zones: [], zoneSelected: {}, statsMap: {}, + displayAllocatedCompute: false, data: { pods: 0, clusters: 0, @@ -402,6 +413,18 @@ export default { } return 'normal' }, + displayPercentUsedOrAllocated (used, allocated, total) { + var value = this.displayAllocatedCompute ? allocated : used + return parseFloat(100.0 * value / total) + }, + displayPercentFormatUsedOrAllocated (used, allocated, total) { + var value = this.displayAllocatedCompute ? allocated : used + return parseFloat(100.0 * value / total).toFixed(2) + '%' + }, + displayDataUsedOrAllocated (dataType, used, allocated) { + var value = this.displayAllocatedCompute ? allocated : used + return this.displayData(dataType, value) + }, displayData (dataType, value) { if (!value) { value = 0 From 2aa2e92dff4c4ad95200bd80b58745aa1d4b9b41 Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Thu, 23 Jan 2025 14:10:09 -0500 Subject: [PATCH 21/50] Handle special characters when exporting ACLs (#10259) --- ui/src/views/network/AclListRulesTab.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/src/views/network/AclListRulesTab.vue b/ui/src/views/network/AclListRulesTab.vue index 1bcce08e335..6c4a0541d4e 100644 --- a/ui/src/views/network/AclListRulesTab.vue +++ b/ui/src/views/network/AclListRulesTab.vue @@ -694,7 +694,7 @@ export default { const csvData = this.csv({ data: this.acls }) const hiddenElement = document.createElement('a') - hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(csvData) + hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURIComponent(csvData) hiddenElement.target = '_blank' hiddenElement.download = 'AclRules-' + this.resource.name + '-' + this.resource.id + '.csv' hiddenElement.click() From 4787885fc0911d2198e3dcf5166f20b1ebe9b393 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Fri, 24 Jan 2025 14:17:52 +0530 Subject: [PATCH 22/50] cks: prevent npe on cluster listing with removed offering (#10075) Signed-off-by: Abhishek Kumar --- .../cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java index bf5a3ef60c3..9c402f83b03 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java @@ -560,7 +560,7 @@ public class KubernetesClusterManagerImpl extends ManagerBase implements Kuberne if (template != null) { response.setTemplateId(template.getUuid()); } - ServiceOfferingVO offering = serviceOfferingDao.findById(kubernetesCluster.getServiceOfferingId()); + ServiceOfferingVO offering = serviceOfferingDao.findByIdIncludingRemoved(kubernetesCluster.getServiceOfferingId()); if (offering != null) { response.setServiceOfferingId(offering.getUuid()); response.setServiceOfferingName(offering.getName()); From 0a77eb7f85fde580ce6d1e28e19c98ff39ff3f9e Mon Sep 17 00:00:00 2001 From: dahn Date: Fri, 24 Jan 2025 11:09:56 +0100 Subject: [PATCH 23/50] deal with NPE during host reconnect (#10158) * log to see what command is being processed * exception names --- .../cloud/agent/manager/AgentManagerImpl.java | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 606a902dce7..92d18dcf4e4 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -52,6 +52,7 @@ import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.cloudstack.utils.reflectiontostringbuilderutils.ReflectionToStringBuilderUtils; import org.apache.commons.lang3.BooleanUtils; import org.apache.log4j.Logger; import org.apache.log4j.MDC; @@ -569,27 +570,27 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } for (int i = 0; i < cmd.length; i++) { try { - monitor.second().processConnect(host, cmd[i], forRebalance); - } catch (final Exception e) { - if (e instanceof ConnectionException) { - final ConnectionException ce = (ConnectionException)e; - if (ce.isSetupError()) { - s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage()); - handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); - throw ce; - } else { - s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage()); - handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true); - return attache; - } - } else if (e instanceof HypervisorVersionChangedException) { - handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true); - throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); - } else { - s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e); - handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); - throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); + if (s_logger.isDebugEnabled()) { + s_logger.debug("process connection to issue " + ReflectionToStringBuilderUtils.reflectCollection(cmd[i]) + " forRebalance == " + forRebalance); } + monitor.second().processConnect(host, cmd[i], forRebalance); + } catch (final ConnectionException ce) { + if (ce.isSetupError()) { + s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + ce.getMessage()); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); + throw ce; + } else { + s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + ce.getMessage()); + handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true); + return attache; + } + } catch (final HypervisorVersionChangedException hvce) { + handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true); + throw new CloudRuntimeException("Unable to connect " + attache.getId(), hvce); + } catch (final Exception e) { + s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); + throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); } } } From f652ad0d98ce3dcda57214938ada63d599799b24 Mon Sep 17 00:00:00 2001 From: dahn Date: Mon, 27 Jan 2025 14:14:31 +0100 Subject: [PATCH 24/50] extra null guard (#10264) --- .../main/java/com/cloud/agent/manager/AgentManagerImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 92d18dcf4e4..d2e86fbc4b9 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -586,11 +586,11 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } } catch (final HypervisorVersionChangedException hvce) { handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true); - throw new CloudRuntimeException("Unable to connect " + attache.getId(), hvce); + throw new CloudRuntimeException("Unable to connect " + (attache == null ? "" : attache.getId()), hvce); } catch (final Exception e) { s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e); handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true); - throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); + throw new CloudRuntimeException("Unable to connect " + (attache == null ? "" : attache.getId()), e); } } } From 789e2699406d8930cfe98fa0c632e51191c5c908 Mon Sep 17 00:00:00 2001 From: Lucas Martins <56271185+lucas-a-martins@users.noreply.github.com> Date: Tue, 28 Jan 2025 04:14:22 -0300 Subject: [PATCH 25/50] Add lucas-a-martins as a project collaborator (#10283) Co-authored-by: Lucas Martins --- .asf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.asf.yaml b/.asf.yaml index 4d979a18833..c052077c753 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -60,6 +60,7 @@ github: - bernardodemarco - abh1sar - FelipeM525 + - lucas-a-martins protected_branches: ~ From 33a37da9ec7ef75fa42b9c6f2ca3a8f0ebf072bc Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Tue, 28 Jan 2025 14:39:31 +0530 Subject: [PATCH 26/50] server: investigate pending HA work when executing in new MS session (#10167) For HA work items that are created for host state change, checks must be done when execution is called in a new management server session. A new column, reason, has been added in cloud.op_ha_work table to track the reason for HA work. When HighAvailabilityManager starts it finds and puts all pending HA work items in Investigating state. During execution of the HA work if it is found in investigating state, checks are done to verify if the work is still valid. If the jobs is found to be invalid it is cancelled. Signed-off-by: Abhishek Kumar --- .../com/cloud/ha/HighAvailabilityManager.java | 21 ++++-- .../cloud/agent/manager/AgentManagerImpl.java | 2 +- .../META-INF/db/schema-42000to42010.sql | 3 + .../src/main/java/com/cloud/ha/HaWorkVO.java | 15 +++- .../cloud/ha/HighAvailabilityManagerImpl.java | 74 ++++++++++++++++--- .../com/cloud/ha/dao/HighAvailabilityDao.java | 2 + .../cloud/ha/dao/HighAvailabilityDaoImpl.java | 30 +++++++- .../cloud/resource/ResourceManagerImpl.java | 30 ++++---- .../provider/host/HAAbstractHostProvider.java | 2 +- .../ha/HighAvailabilityManagerImplTest.java | 66 +++++++++++++++-- .../ha/dao/HighAvailabilityDaoImplTest.java | 59 ++++++++++++++- 11 files changed, 260 insertions(+), 44 deletions(-) diff --git a/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java b/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java index 728f5a2b180..ddc8153d739 100644 --- a/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java +++ b/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java @@ -84,6 +84,13 @@ public interface HighAvailabilityManager extends Manager { HA; // Restart a VM. } + enum ReasonType { + Unknown, + HostMaintenance, + HostDown, + HostDegraded; + } + enum Step { Scheduled, Investigating, Fencing, Stopping, Restarting, Migrating, Cancelled, Done, Error, } @@ -92,7 +99,7 @@ public interface HighAvailabilityManager extends Manager { * Investigate why a host has disconnected and migrate the VMs on it * if necessary. * - * @param host - the host that has disconnected. + * @param hostId - the id of the host that has disconnected. */ Status investigate(long hostId); @@ -109,17 +116,19 @@ public interface HighAvailabilityManager extends Manager { * @param investigate must be investigated before we do anything with this vm. */ void scheduleRestart(VMInstanceVO vm, boolean investigate); + void scheduleRestart(VMInstanceVO vm, boolean investigate, ReasonType reasonType); void cancelDestroy(VMInstanceVO vm, Long hostId); - boolean scheduleDestroy(VMInstanceVO vm, long hostId); + boolean scheduleDestroy(VMInstanceVO vm, long hostId, ReasonType reasonType); /** * Schedule restarts for all vms running on the host. * @param host host. - * @param investigate TODO + * @param investigate whether to investigate + * @param reasonType reason for HA work */ - void scheduleRestartForVmsOnHost(HostVO host, boolean investigate); + void scheduleRestartForVmsOnHost(HostVO host, boolean investigate, ReasonType reasonType); /** * Schedule the vm for migration. @@ -128,6 +137,7 @@ public interface HighAvailabilityManager extends Manager { * @return true if schedule worked. */ boolean scheduleMigration(VMInstanceVO vm); + boolean scheduleMigration(VMInstanceVO vm, ReasonType reasonType); List findTakenMigrationWork(); @@ -140,10 +150,11 @@ public interface HighAvailabilityManager extends Manager { * 3. Check if a VM has been stopped: WorkType.CheckStop * * @param vm virtual machine to stop. - * @param host host the virtual machine is on. + * @param hostId the id of the host the virtual machine is on. * @param type which type of stop is requested. */ boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type); + boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type, ReasonType reasonType); void cancelScheduledMigrations(HostVO host); diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 09fb211fedf..32180a91909 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -989,7 +989,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl handleDisconnectWithoutInvestigation(attache, event, true, true); host = _hostDao.findById(hostId); // Maybe the host magically reappeared? if (host != null && host.getStatus() == Status.Down) { - _haMgr.scheduleRestartForVmsOnHost(host, true); + _haMgr.scheduleRestartForVmsOnHost(host, true, HighAvailabilityManager.ReasonType.HostDown); } return true; } diff --git a/engine/schema/src/main/resources/META-INF/db/schema-42000to42010.sql b/engine/schema/src/main/resources/META-INF/db/schema-42000to42010.sql index 8b70cce3404..976ef217832 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-42000to42010.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-42000to42010.sql @@ -35,3 +35,6 @@ CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.volumes', 'last_id', 'bigint(20) uns -- Add used_iops column to support IOPS data in storage stats CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.storage_pool', 'used_iops', 'bigint unsigned DEFAULT NULL COMMENT "IOPS currently in use for this storage pool" '); + +-- Add reason column for op_ha_work +CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.op_ha_work', 'reason', 'varchar(32) DEFAULT NULL COMMENT "Reason for the HA work"'); diff --git a/server/src/main/java/com/cloud/ha/HaWorkVO.java b/server/src/main/java/com/cloud/ha/HaWorkVO.java index f5a36e3bb1a..8307f4d9317 100644 --- a/server/src/main/java/com/cloud/ha/HaWorkVO.java +++ b/server/src/main/java/com/cloud/ha/HaWorkVO.java @@ -86,6 +86,10 @@ public class HaWorkVO implements InternalIdentity { @Column(name = "tried") int timesTried; + @Column(name = "reason") + @Enumerated(value = EnumType.STRING) + private HighAvailabilityManager.ReasonType reasonType; + protected HaWorkVO() { } @@ -179,7 +183,7 @@ public class HaWorkVO implements InternalIdentity { } public HaWorkVO(final long instanceId, final VirtualMachine.Type type, final WorkType workType, final Step step, final long hostId, final State previousState, - final int timesTried, final long updated) { + final int timesTried, final long updated, HighAvailabilityManager.ReasonType reasonType) { this.workType = workType; this.type = type; this.instanceId = instanceId; @@ -191,6 +195,7 @@ public class HaWorkVO implements InternalIdentity { this.step = step; this.timeToTry = System.currentTimeMillis() >> 10; this.updateTime = updated; + this.reasonType = reasonType; } @Override @@ -207,4 +212,12 @@ public class HaWorkVO implements InternalIdentity { .append("]") .toString(); } + + public HighAvailabilityManager.ReasonType getReasonType() { + return reasonType; + } + + public void setReasonType(HighAvailabilityManager.ReasonType reasonType) { + this.reasonType = reasonType; + } } diff --git a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java index e10bd47a067..2ce803756fe 100644 --- a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -19,6 +19,7 @@ package com.cloud.ha; import static org.apache.cloudstack.framework.config.ConfigKey.Scope.Zone; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -43,6 +44,7 @@ import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.managed.context.ManagedContext; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.management.ManagementServerHost; +import org.apache.logging.log4j.ThreadContext; import com.cloud.agent.AgentManager; import com.cloud.alert.AlertManager; @@ -90,7 +92,6 @@ import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.dao.VMInstanceDao; -import org.apache.logging.log4j.ThreadContext; /** * HighAvailabilityManagerImpl coordinates the HA process. VMs are registered with the HA Manager for HA. The request is stored @@ -133,6 +134,9 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur protected static ConfigKey VmHaAlertsEnabled = new ConfigKey<>("Advanced", Boolean.class, "vm.ha.alerts.enabled", "true", "Enable/Disable alerts for the VM HA operations, it is enabled by default.", true, Zone); + protected static final List CancellableWorkReasonTypes = + Arrays.asList(ReasonType.HostMaintenance, ReasonType.HostDown, ReasonType.HostDegraded); + WorkerThread[] _workers; boolean _stopped; long _timeToSleep; @@ -269,8 +273,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur } @Override - public void scheduleRestartForVmsOnHost(final HostVO host, boolean investigate) { - + public void scheduleRestartForVmsOnHost(final HostVO host, boolean investigate, ReasonType reasonType) { if (host.getType() != Host.Type.Routing) { return; } @@ -337,12 +340,12 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur logger.debug("VM {} is not on down host {} it is on other host {} VM HA is done", vm, host, hostId); continue; } - scheduleRestart(vm, investigate); + scheduleRestart(vm, investigate, reasonType); } } @Override - public boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type) { + public boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type, ReasonType reasonType) { assert (type == WorkType.CheckStop || type == WorkType.ForceStop || type == WorkType.Stop); if (_haDao.hasBeenScheduled(vm.getId(), type)) { @@ -359,7 +362,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur return false; } - HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), type, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); + HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), type, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated(), reasonType); _haDao.persist(work); if (logger.isDebugEnabled()) { logger.debug("Scheduled " + work); @@ -368,6 +371,11 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur return true; } + @Override + public boolean scheduleStop(VMInstanceVO vm, long hostId, WorkType type) { + return scheduleStop(vm, hostId, type, null); + } + protected void wakeupWorkers() { logger.debug("Wakeup workers HA"); for (WorkerThread worker : _workers) { @@ -376,7 +384,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur } @Override - public boolean scheduleMigration(final VMInstanceVO vm) { + public boolean scheduleMigration(final VMInstanceVO vm, ReasonType reasonType) { if (vm.getHostId() == null) { return false; } @@ -390,7 +398,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur return false; } - final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, vm.getHostId(), vm.getState(), 0, vm.getUpdated()); + final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, vm.getHostId(), vm.getState(), 0, vm.getUpdated(), reasonType); _haDao.persist(work); logger.info("Scheduled migration work of VM {} from host {} with HAWork {}", vm, _hostDao.findById(vm.getHostId()), work); wakeupWorkers(); @@ -398,7 +406,12 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur } @Override - public void scheduleRestart(VMInstanceVO vm, boolean investigate) { + public boolean scheduleMigration(final VMInstanceVO vm) { + return scheduleMigration(vm, null); + } + + @Override + public void scheduleRestart(VMInstanceVO vm, boolean investigate, ReasonType reasonType) { if (!VmHaEnabled.valueIn(vm.getDataCenterId())) { String message = String.format("Unable to schedule restart for the VM %s (%d), VM high availability manager is disabled.", vm.getName(), vm.getId()); if (logger.isDebugEnabled()) { @@ -490,7 +503,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur } HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, - hostId != null ? hostId : 0L, vm.getState(), timesTried, vm.getUpdated()); + hostId != null ? hostId : 0L, vm.getState(), timesTried, vm.getUpdated(), reasonType); _haDao.persist(work); if (logger.isInfoEnabled()) { @@ -500,6 +513,11 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur wakeupWorkers(); } + @Override + public void scheduleRestart(VMInstanceVO vm, boolean investigate) { + scheduleRestart(vm, investigate, null); + } + private void startVm(VirtualMachine vm, Map params, DeploymentPlanner planner) throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException, OperationTimedoutException { @@ -561,6 +579,9 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur logger.info("Unable to find vm: " + vmId); return null; } + if (checkAndCancelWorkIfNeeded(work)) { + return null; + } logger.info("HA on " + vm); if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) { @@ -762,6 +783,22 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } + protected boolean checkAndCancelWorkIfNeeded(final HaWorkVO work) { + if (!Step.Investigating.equals(work.getStep())) { + return false; + } + if (!CancellableWorkReasonTypes.contains(work.getReasonType())) { + return false; + } + Status hostStatus = investigate(work.getHostId()); + if (!Status.Up.equals(hostStatus)) { + return false; + } + logger.debug("Cancelling {} as it is not needed anymore", () -> work); + work.setStep(Step.Cancelled); + return true; + } + public Long migrate(final HaWorkVO work) { long vmId = work.getInstanceId(); long srcHostId = work.getHostId(); @@ -772,6 +809,9 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur logger.info("Unable to find vm: " + vmId + ", skipping migrate."); return null; } + if (checkAndCancelWorkIfNeeded(work)) { + return null; + } logger.info("Migration attempt: for VM {}from host {}. Starting attempt: {}/{} times.", vm, srcHost, 1 + work.getTimesTried(), _maxRetries); try { work.setStep(Step.Migrating); @@ -791,7 +831,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur } @Override - public boolean scheduleDestroy(VMInstanceVO vm, long hostId) { + public boolean scheduleDestroy(VMInstanceVO vm, long hostId, ReasonType reasonType) { if (!VmHaEnabled.valueIn(vm.getDataCenterId())) { String message = String.format("Unable to schedule destroy for the VM %s (%d) on host %d, VM high availability manager is disabled.", vm.getName(), vm.getId(), hostId); if (logger.isDebugEnabled()) { @@ -801,7 +841,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur return false; } - final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Destroy, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); + final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Destroy, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated(), reasonType); _haDao.persist(work); if (logger.isDebugEnabled()) { logger.debug("Scheduled " + work.toString()); @@ -838,6 +878,9 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work); return null; } + if (checkAndCancelWorkIfNeeded(work)) { + return null; + } boolean expunge = VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) || VirtualMachine.Type.ConsoleProxy.equals(vm.getType()); if (!expunge && VirtualMachine.State.Destroyed.equals(work.getPreviousState())) { @@ -872,6 +915,9 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur work.setStep(Step.Done); return null; } + if (checkAndCancelWorkIfNeeded(work)) { + return null; + } logger.info("Stopping " + vm); try { if (work.getWorkType() == WorkType.Stop) { @@ -1057,6 +1103,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur public boolean start() { _stopped = false; + _haDao.markPendingWorksAsInvestigating(); + for (final WorkerThread thread : _workers) { thread.start(); } @@ -1074,6 +1122,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur _executor.shutdown(); + _haDao.markServerPendingWorksAsInvestigating(_msServer.getId()); + return true; } diff --git a/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDao.java b/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDao.java index f6539105d78..42c8aabe41a 100644 --- a/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDao.java +++ b/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDao.java @@ -86,4 +86,6 @@ public interface HighAvailabilityDao extends GenericDao { List listPendingMigrationsForVm(long vmId); int expungeByVmList(List vmIds, Long batchSize); + void markPendingWorksAsInvestigating(); + void markServerPendingWorksAsInvestigating(long managementServerId); } diff --git a/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDaoImpl.java b/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDaoImpl.java index c722c6376c1..00b62e0d601 100644 --- a/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDaoImpl.java +++ b/server/src/main/java/com/cloud/ha/dao/HighAvailabilityDaoImpl.java @@ -31,12 +31,13 @@ import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.SearchCriteria.Op; import com.cloud.utils.db.TransactionLegacy; +import com.cloud.utils.db.UpdateBuilder; import com.cloud.utils.exception.CloudRuntimeException; @Component public class HighAvailabilityDaoImpl extends GenericDaoBase implements HighAvailabilityDao { - private final SearchBuilder TBASearch; + protected SearchBuilder TBASearch; private final SearchBuilder PreviousInstanceSearch; private final SearchBuilder UntakenMigrationSearch; private final SearchBuilder CleanupSearch; @@ -270,4 +271,31 @@ public class HighAvailabilityDaoImpl extends GenericDaoBase impl sc.setParameters("vmIds", vmIds.toArray()); return batchExpunge(sc, batchSize); } + + protected void updatePendingWorkToInvestigating(SearchCriteria sc) { + HaWorkVO haWorkVO = createForUpdate(); + haWorkVO.setStep(Step.Investigating); + UpdateBuilder updateBuilder = getUpdateBuilder(haWorkVO); + update(updateBuilder, sc, null); + } + + @Override + public void markPendingWorksAsInvestigating() { + final SearchCriteria sc = TBASearch.create(); + sc.setParameters("time", System.currentTimeMillis() >> 10); + sc.setParameters("step", Step.Done, Step.Cancelled); + updatePendingWorkToInvestigating(sc); + } + + @Override + public void markServerPendingWorksAsInvestigating(long managementServerId) { + SearchBuilder sb = createSearchBuilder(); + sb.and("server", sb.entity().getServerId(), Op.EQ); + sb.and("step", sb.entity().getStep(), Op.NIN); + sb.done(); + SearchCriteria sc = sb.create(); + sc.setParameters("server", managementServerId); + sc.setParameters("step", Step.Done, Step.Cancelled); + updatePendingWorkToInvestigating(sc); + } } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 50116905bfe..1349e03f205 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -37,16 +37,6 @@ import java.util.stream.Collectors; import javax.inject.Inject; import javax.naming.ConfigurationException; -import com.cloud.alert.AlertManager; -import com.cloud.cpu.CPU; -import com.cloud.exception.StorageConflictException; -import com.cloud.exception.StorageUnavailableException; -import com.cloud.ha.HighAvailabilityManagerImpl; -import com.cloud.host.HostTagVO; -import com.cloud.storage.Volume; -import com.cloud.storage.VolumeVO; -import com.cloud.storage.dao.VolumeDao; -import com.cloud.hypervisor.HypervisorGuru; import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; @@ -93,6 +83,7 @@ import com.cloud.agent.api.UpdateHostPasswordCommand; import com.cloud.agent.api.VgpuTypesInfo; import com.cloud.agent.api.to.GPUDeviceTO; import com.cloud.agent.transport.Request; +import com.cloud.alert.AlertManager; import com.cloud.capacity.Capacity; import com.cloud.capacity.CapacityManager; import com.cloud.capacity.CapacityState; @@ -101,6 +92,7 @@ import com.cloud.capacity.dao.CapacityDao; import com.cloud.cluster.ClusterManager; import com.cloud.configuration.Config; import com.cloud.configuration.ConfigurationManager; +import com.cloud.cpu.CPU; import com.cloud.dc.ClusterDetailsDao; import com.cloud.dc.ClusterDetailsVO; import com.cloud.dc.ClusterVO; @@ -134,6 +126,8 @@ import com.cloud.exception.InvalidParameterValueException; import com.cloud.exception.PermissionDeniedException; import com.cloud.exception.ResourceInUseException; import com.cloud.exception.ResourceUnavailableException; +import com.cloud.exception.StorageConflictException; +import com.cloud.exception.StorageUnavailableException; import com.cloud.gpu.GPU; import com.cloud.gpu.HostGpuGroupsVO; import com.cloud.gpu.VGPUTypesVO; @@ -141,10 +135,12 @@ import com.cloud.gpu.dao.HostGpuGroupsDao; import com.cloud.gpu.dao.VGPUTypesDao; import com.cloud.ha.HighAvailabilityManager; import com.cloud.ha.HighAvailabilityManager.WorkType; +import com.cloud.ha.HighAvailabilityManagerImpl; import com.cloud.host.DetailVO; import com.cloud.host.Host; import com.cloud.host.Host.Type; import com.cloud.host.HostStats; +import com.cloud.host.HostTagVO; import com.cloud.host.HostVO; import com.cloud.host.Status; import com.cloud.host.Status.Event; @@ -153,6 +149,7 @@ import com.cloud.host.dao.HostDetailsDao; import com.cloud.host.dao.HostTagsDao; import com.cloud.hypervisor.Hypervisor; import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.hypervisor.HypervisorGuru; import com.cloud.hypervisor.kvm.discoverer.KvmDummyResourceBase; import com.cloud.network.dao.IPAddressDao; import com.cloud.network.dao.IPAddressVO; @@ -170,10 +167,13 @@ import com.cloud.storage.StoragePoolHostVO; import com.cloud.storage.StoragePoolStatus; import com.cloud.storage.StorageService; import com.cloud.storage.VMTemplateVO; +import com.cloud.storage.Volume; +import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.DiskOfferingDao; import com.cloud.storage.dao.GuestOSCategoryDao; import com.cloud.storage.dao.StoragePoolHostDao; import com.cloud.storage.dao.VMTemplateDao; +import com.cloud.storage.dao.VolumeDao; import com.cloud.user.Account; import com.cloud.user.AccountManager; import com.cloud.utils.Ternary; @@ -1348,7 +1348,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) || VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { logger.error("Maintenance: VM is of type {}. Destroying VM {} immediately instead of migration.", vm.getType(), vm); - _haMgr.scheduleDestroy(vm, host.getId()); + _haMgr.scheduleDestroy(vm, host.getId(), HighAvailabilityManager.ReasonType.HostMaintenance); return; } logger.error("Maintenance: No hosts available for migrations. Scheduling shutdown for VM {} instead of migration.", vm); @@ -1405,10 +1405,10 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, handleVmForLastHostOrWithVGpu(host, vm); } else if (HypervisorType.LXC.equals(host.getHypervisorType()) && VirtualMachine.Type.User.equals(vm.getType())){ //Migration is not supported for LXC Vms. Schedule restart instead. - _haMgr.scheduleRestart(vm, false); + _haMgr.scheduleRestart(vm, false, HighAvailabilityManager.ReasonType.HostMaintenance); } else if (userVmManager.isVMUsingLocalStorage(vm)) { if (isMaintenanceLocalStrategyForceStop()) { - _haMgr.scheduleStop(vm, hostId, WorkType.ForceStop); + _haMgr.scheduleStop(vm, hostId, WorkType.ForceStop, HighAvailabilityManager.ReasonType.HostMaintenance); } else if (isMaintenanceLocalStrategyMigrate()) { migrateAwayVmWithVolumes(host, vm); } else if (!isMaintenanceLocalStrategyDefault()){ @@ -1421,7 +1421,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } } else { logger.info("Maintenance: scheduling migration of VM {} from host {}", vm, host); - _haMgr.scheduleMigration(vm); + _haMgr.scheduleMigration(vm, HighAvailabilityManager.ReasonType.HostMaintenance); } } } @@ -1637,7 +1637,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, for (VMInstanceVO vm : allVmsOnHost) { State vmState = vm.getState(); if (vmState == State.Starting || vmState == State.Running || vmState == State.Stopping) { - _haMgr.scheduleRestart(vm, false); + _haMgr.scheduleRestart(vm, false, HighAvailabilityManager.ReasonType.HostDegraded); } } } diff --git a/server/src/main/java/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java b/server/src/main/java/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java index 5907c1864ad..2d77e6f9d20 100644 --- a/server/src/main/java/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java +++ b/server/src/main/java/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java @@ -74,7 +74,7 @@ public abstract class HAAbstractHostProvider extends AdapterBase implements HAPr try { logger.debug("Trying to disconnect the host without investigation and scheduling HA for the VMs on host {}", host); agentManager.disconnectWithoutInvestigation(host.getId(), Event.HostDown); - oldHighAvailabilityManager.scheduleRestartForVmsOnHost((HostVO)host, true); + oldHighAvailabilityManager.scheduleRestartForVmsOnHost((HostVO)host, true, HighAvailabilityManager.ReasonType.HostDown); } catch (Exception e) { logger.error("Failed to disconnect host and schedule HA restart of VMs after fencing the host: ", e); } diff --git a/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java b/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java index 53ae5d2279e..24714b72388 100644 --- a/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java +++ b/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java @@ -135,6 +135,9 @@ public class HighAvailabilityManagerImplTest { @Mock UserVmManager userVmManager; + @Mock + private HaWorkVO mockWork; + HighAvailabilityManagerImpl highAvailabilityManager; HighAvailabilityManagerImpl highAvailabilityManagerSpy; static Method processWorkMethod = null; @@ -185,7 +188,7 @@ public class HighAvailabilityManagerImplTest { highAvailabilityManager.VmHaEnabled = haEnabled; Mockito.when(highAvailabilityManager.VmHaEnabled.valueIn(1L)).thenReturn(true); - highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true); + highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true, HighAvailabilityManager.ReasonType.HostDown); } @Test @@ -193,7 +196,7 @@ public class HighAvailabilityManagerImplTest { Mockito.when(hostVO.getType()).thenReturn(Host.Type.Routing); Mockito.when(hostVO.getHypervisorType()).thenReturn(HypervisorType.VMware); - highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true); + highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true, HighAvailabilityManager.ReasonType.HostDown); } @Test @@ -206,7 +209,7 @@ public class HighAvailabilityManagerImplTest { highAvailabilityManager.VmHaEnabled = haEnabled; Mockito.when(highAvailabilityManager.VmHaEnabled.valueIn(1L)).thenReturn(false); - highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true); + highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true, HighAvailabilityManager.ReasonType.HostDown); } @Test @@ -240,7 +243,7 @@ public class HighAvailabilityManagerImplTest { highAvailabilityManager.VmHaEnabled = haEnabled; Mockito.when(highAvailabilityManager.VmHaEnabled.valueIn(1L)).thenReturn(true); - highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true); + highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true, HighAvailabilityManager.ReasonType.HostDown); } @Test @@ -336,7 +339,7 @@ public class HighAvailabilityManagerImplTest { Mockito.when(vm.getState()).thenReturn(VirtualMachine.State.Running); Mockito.when(_haDao.persist((HaWorkVO)Mockito.any())).thenReturn(Mockito.mock(HaWorkVO.class)); - assertTrue(highAvailabilityManager.scheduleDestroy(vm, 1L)); + assertTrue(highAvailabilityManager.scheduleDestroy(vm, 1L, HighAvailabilityManager.ReasonType.HostMaintenance)); } @Test @@ -348,7 +351,7 @@ public class HighAvailabilityManagerImplTest { highAvailabilityManager.VmHaEnabled = haEnabled; Mockito.when(highAvailabilityManager.VmHaEnabled.valueIn(1L)).thenReturn(false); - assertFalse(highAvailabilityManager.scheduleDestroy(vm, 1L)); + assertFalse(highAvailabilityManager.scheduleDestroy(vm, 1L, HighAvailabilityManager.ReasonType.HostMaintenance)); } @Test @@ -402,7 +405,7 @@ public class HighAvailabilityManagerImplTest { private void processWorkWithRetryCount(int count, Step expectedStep) { assertNotNull(processWorkMethod); - HaWorkVO work = new HaWorkVO(1l, VirtualMachine.Type.User, WorkType.Migration, Step.Scheduled, 1l, VirtualMachine.State.Running, count, 12345678l); + HaWorkVO work = new HaWorkVO(1l, VirtualMachine.Type.User, WorkType.Migration, Step.Scheduled, 1l, VirtualMachine.State.Running, count, 12345678l, null); Mockito.doReturn(12345678l).when(highAvailabilityManagerSpy).migrate(work); try { processWorkMethod.invoke(highAvailabilityManagerSpy, work); @@ -425,4 +428,53 @@ public class HighAvailabilityManagerImplTest { public void processWorkWithRetryCountNotExceeded() { processWorkWithRetryCount(3, Step.Scheduled); } + + @Test + public void testCheckAndCancelWorkIfNeeded_Success() { + Mockito.when(mockWork.getStep()).thenReturn(Step.Investigating); + Mockito.when(mockWork.getReasonType()).thenReturn(HighAvailabilityManager.ReasonType.HostMaintenance); + Mockito.when(mockWork.getHostId()).thenReturn(1L); + Mockito.doReturn(Status.Up).when(highAvailabilityManagerSpy).investigate(1L); + Mockito.doNothing().when(mockWork).setStep(Step.Cancelled); + boolean result = highAvailabilityManagerSpy.checkAndCancelWorkIfNeeded(mockWork); + assertTrue(result); + Mockito.verify(mockWork).setStep(Step.Cancelled); + } + + @Test + public void testCheckAndCancelWorkIfNeeded_StepNotInvestigating() { + Mockito.when(mockWork.getStep()).thenReturn(Step.Cancelled); + boolean result = highAvailabilityManagerSpy.checkAndCancelWorkIfNeeded(mockWork); + assertFalse(result); + Mockito.verify(mockWork, Mockito.never()).setStep(Mockito.any()); + } + + private void runInvalidReasonCheckAndCancelWorkIfNeeded(HighAvailabilityManager.ReasonType reasonType) { + Mockito.when(mockWork.getStep()).thenReturn(Step.Investigating); + Mockito.when(mockWork.getReasonType()).thenReturn(reasonType); + boolean result = highAvailabilityManagerSpy.checkAndCancelWorkIfNeeded(mockWork); + assertFalse(result); + Mockito.verify(mockWork, Mockito.never()).setStep(Mockito.any()); + } + + @Test + public void testCheckAndCancelWorkIfNeeded_InvalidReasonType() { + runInvalidReasonCheckAndCancelWorkIfNeeded(HighAvailabilityManager.ReasonType.Unknown); + } + + @Test + public void testCheckAndCancelWorkIfNeeded_NullReasonType() { + runInvalidReasonCheckAndCancelWorkIfNeeded(null); + } + + @Test + public void testCheckAndCancelWorkIfNeeded_HostStatusNotUp() { + Mockito.when(mockWork.getStep()).thenReturn(Step.Investigating); + Mockito.when(mockWork.getReasonType()).thenReturn(HighAvailabilityManager.ReasonType.HostDown); + Mockito.when(mockWork.getHostId()).thenReturn(1L); + Mockito.doReturn(Status.Down).when(highAvailabilityManagerSpy).investigate(1L); + boolean result = highAvailabilityManagerSpy.checkAndCancelWorkIfNeeded(mockWork); + assertFalse(result); + Mockito.verify(mockWork, Mockito.never()).setStep(Mockito.any()); + } } diff --git a/server/src/test/java/com/cloud/ha/dao/HighAvailabilityDaoImplTest.java b/server/src/test/java/com/cloud/ha/dao/HighAvailabilityDaoImplTest.java index 783497740fd..c2b95474d0b 100644 --- a/server/src/test/java/com/cloud/ha/dao/HighAvailabilityDaoImplTest.java +++ b/server/src/test/java/com/cloud/ha/dao/HighAvailabilityDaoImplTest.java @@ -22,20 +22,25 @@ import java.util.List; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; +import org.mockito.MockedStatic; import org.mockito.Mockito; import org.mockito.Spy; import org.mockito.junit.MockitoJUnitRunner; import org.mockito.stubbing.Answer; import com.cloud.ha.HaWorkVO; +import com.cloud.ha.HighAvailabilityManager; +import com.cloud.utils.db.GenericDaoBase; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; +import com.cloud.utils.db.UpdateBuilder; +import com.cloud.vm.VirtualMachine; @RunWith(MockitoJUnitRunner.class) public class HighAvailabilityDaoImplTest { @Spy - HighAvailabilityDaoImpl highAvailabilityDaoImpl; + HighAvailabilityDaoImpl highAvailabilityDaoImpl = new HighAvailabilityDaoImpl(); @Test public void testExpungeByVmListNoVms() { @@ -65,4 +70,56 @@ public class HighAvailabilityDaoImplTest { Mockito.verify(highAvailabilityDaoImpl, Mockito.times(1)) .batchExpunge(sc, batchSize); } + + @Test + public void testMarkPendingWorksAsInvestigating() throws Exception { + SearchBuilder mockTBASearch = Mockito.mock(SearchBuilder.class); + highAvailabilityDaoImpl.TBASearch = mockTBASearch; + SearchCriteria mockSearchCriteria = Mockito.mock(SearchCriteria.class); + UpdateBuilder mockUpdateBuilder = Mockito.mock(UpdateBuilder.class); + Mockito.when(mockTBASearch.create()).thenReturn(mockSearchCriteria); + Mockito.doNothing().when(mockSearchCriteria).setParameters(Mockito.eq("time"), Mockito.anyLong()); + Mockito.doNothing().when(mockSearchCriteria).setParameters(Mockito.eq("step"), Mockito.eq(HighAvailabilityManager.Step.Done), Mockito.eq(HighAvailabilityManager.Step.Cancelled)); + HaWorkVO haWorkVO = new HaWorkVO(1L, VirtualMachine.Type.User, null, + null, 1L, null, 0, 0, + HighAvailabilityManager.ReasonType.HostMaintenance); + Mockito.when(highAvailabilityDaoImpl.createForUpdate()).thenReturn(haWorkVO); + try(MockedStatic genericDaoBaseMockedStatic = Mockito.mockStatic(GenericDaoBase.class)) { + genericDaoBaseMockedStatic.when(() -> GenericDaoBase.getUpdateBuilder(Mockito.any())).thenReturn(mockUpdateBuilder); + Mockito.doReturn(5).when(highAvailabilityDaoImpl).update(Mockito.any(UpdateBuilder.class), Mockito.any(), Mockito.nullable(Integer.class)); + highAvailabilityDaoImpl.markPendingWorksAsInvestigating(); + Mockito.verify(mockTBASearch).create(); + Mockito.verify(mockSearchCriteria).setParameters(Mockito.eq("time"), Mockito.anyLong()); + Mockito.verify(mockSearchCriteria).setParameters(Mockito.eq("step"), Mockito.eq(HighAvailabilityManager.Step.Done), Mockito.eq(HighAvailabilityManager.Step.Cancelled)); + Assert.assertEquals(HighAvailabilityManager.Step.Investigating, haWorkVO.getStep()); // Ensure the step is set correctly + Mockito.verify(highAvailabilityDaoImpl).update(Mockito.eq(mockUpdateBuilder), Mockito.eq(mockSearchCriteria), Mockito.isNull()); + } + } + + @Test + public void testMarkServerPendingWorksAsInvestigating() { + SearchBuilder mockSearch = Mockito.mock(SearchBuilder.class); + Mockito.doReturn(Mockito.mock(HaWorkVO.class)).when(mockSearch).entity(); + Mockito.doReturn(mockSearch).when(highAvailabilityDaoImpl).createSearchBuilder(); + SearchCriteria mockSearchCriteria = Mockito.mock(SearchCriteria.class); + UpdateBuilder mockUpdateBuilder = Mockito.mock(UpdateBuilder.class); + Mockito.when(mockSearch.create()).thenReturn(mockSearchCriteria); + Mockito.doNothing().when(mockSearchCriteria).setParameters(Mockito.eq("server"), Mockito.eq(1L)); + Mockito.doNothing().when(mockSearchCriteria).setParameters(Mockito.eq("step"), Mockito.eq(HighAvailabilityManager.Step.Done), Mockito.eq(HighAvailabilityManager.Step.Cancelled)); + HaWorkVO haWorkVO = new HaWorkVO(1L, VirtualMachine.Type.User, null, + null, 1L, null, 0, 0, + HighAvailabilityManager.ReasonType.HostMaintenance); + Mockito.when(highAvailabilityDaoImpl.createForUpdate()).thenReturn(haWorkVO); + Mockito.when(highAvailabilityDaoImpl.createForUpdate()).thenReturn(haWorkVO); + try(MockedStatic genericDaoBaseMockedStatic = Mockito.mockStatic(GenericDaoBase.class)) { + genericDaoBaseMockedStatic.when(() -> GenericDaoBase.getUpdateBuilder(Mockito.any())).thenReturn(mockUpdateBuilder); + Mockito.doReturn(5).when(highAvailabilityDaoImpl).update(Mockito.any(UpdateBuilder.class), Mockito.any(), Mockito.nullable(Integer.class)); + highAvailabilityDaoImpl.markServerPendingWorksAsInvestigating(1L); + Mockito.verify(mockSearch).create(); + Mockito.verify(mockSearchCriteria).setParameters(Mockito.eq("server"), Mockito.eq(1L)); + Mockito.verify(mockSearchCriteria).setParameters(Mockito.eq("step"), Mockito.eq(HighAvailabilityManager.Step.Done), Mockito.eq(HighAvailabilityManager.Step.Cancelled)); + Assert.assertEquals(HighAvailabilityManager.Step.Investigating, haWorkVO.getStep()); // Ensure the step is set correctly + Mockito.verify(highAvailabilityDaoImpl).update(Mockito.eq(mockUpdateBuilder), Mockito.eq(mockSearchCriteria), Mockito.isNull()); + } + } } From 3b108b968f0f52c0e1ee88041b7b24a14762d0dd Mon Sep 17 00:00:00 2001 From: Suresh Kumar Anaparti Date: Wed, 29 Jan 2025 13:31:15 +0530 Subject: [PATCH 27/50] Support for Management Server Maintenance Mode (#9854) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Support for Management Server Maintenance - New APIs: prepareForMaintenance and cancelMaintenance, with required parameter - managementserverid. - New management server states for maintenance: PreparingForMaintenance, Maintenance. - listHosts API with optional parameter – managementserverid, to list the hosts connected to the management server. - Support management server maintenance when more than one active management servers available. - Triggers transfer agents to other available management servers for maintenance, new agent command MigrateAgentConnectionCommand to initiate transfer of indirect agents. - New global config 'management.server.maintenance.timeout', to set the timeout (in mins) for the management server maintenance window, default: 60 mins. - UI changes: Prepare and Cancel Maintenance in Management Server section, Connected Agents tab, New fields for hosts and management servers. * Updated pending jobs check timer task with ScheduledExecutorService * keep maintenance state on trigger shutdown call when ms is in maintenance * add pending jobs count to ms response * during ms heartbeat, update state to up only when it's down * allow vm work jobs of async job created before prepare for maintenance * Revert "keep maintenance state on trigger shutdown call when ms is in maintenance" This reverts commit 607e13364679eac897f4d146bb3325ea7a61ba17. * skip maintenance test when multiple management servers are not available, and not configured in host setting for kvm --- .../src/main/java/com/cloud/agent/Agent.java | 98 ++- .../main/java/com/cloud/agent/AgentShell.java | 9 + .../java/com/cloud/agent/IAgentShell.java | 4 + api/src/main/java/com/cloud/host/Host.java | 2 + api/src/main/java/com/cloud/host/Status.java | 1 + .../com/cloud/resource/ResourceService.java | 8 +- .../server/ManagementServerHostStats.java | 5 + .../apache/cloudstack/api/ApiConstants.java | 5 +- ...Cmd.java => CancelHostMaintenanceCmd.java} | 2 +- .../api/command/admin/host/ListHostsCmd.java | 8 + ...java => PrepareForHostMaintenanceCmd.java} | 2 +- .../api/response/AsyncJobResponse.java | 16 +- .../cloudstack/api/response/HostResponse.java | 26 +- .../api/response/LoginCmdResponse.java | 12 + .../response/ManagementServerResponse.java | 24 + .../management/ManagementServerHost.java | 2 +- client/pom.xml | 2 +- .../api/MigrateAgentConnectionAnswer.java | 38 ++ .../api/MigrateAgentConnectionCommand.java | 61 ++ .../com/cloud/agent/api/StartupCommand.java | 9 + .../cloud/agent/api/TransferAgentCommand.java | 10 + .../com/cloud/resource/ServerResource.java | 4 + .../agent/test/CheckOnHostCommandTest.java | 5 + .../java/com/cloud/agent/AgentManager.java | 7 + .../com/cloud/resource/ResourceManager.java | 2 + engine/orchestration/pom.xml | 2 +- .../cloud/agent/manager/AgentManagerImpl.java | 110 +++- .../manager/ClusteredAgentManagerImpl.java | 220 ++++++- .../ClusteredAgentRebalanceService.java | 1 + .../entity/api/db/EngineHostVO.java | 12 + .../orchestration/NetworkOrchestrator.java | 2 +- .../src/main/java/com/cloud/host/HostVO.java | 12 + .../main/java/com/cloud/host/dao/HostDao.java | 15 +- .../java/com/cloud/host/dao/HostDaoImpl.java | 48 +- .../META-INF/db/schema-42010to42100.sql | 3 + .../cloudstack/agent/lb/IndirectAgentLB.java | 22 + .../com/cloud/cluster/ClusterManagerImpl.java | 24 +- .../cluster/dao/ManagementServerHostDao.java | 6 +- .../dao/ManagementServerHostDaoImpl.java | 42 +- .../dao/ManagementServerHostPeerDao.java | 3 + .../dao/ManagementServerHostPeerDaoImpl.java | 33 + .../jobs/impl/AsyncJobManagerImpl.java | 55 +- .../vmware/resource/VmwareResource.java | 6 + .../resource/CitrixResourceBase.java | 6 + .../resource/XenServer56Resource.java | 7 +- .../xenserver/XenServerResourceNewBase.java | 7 +- plugins/{shutdown => maintenance}/pom.xml | 4 +- .../command/BaseMSMaintenanceActionCmd.java} | 8 +- .../api/command/CancelMaintenanceCmd.java | 60 ++ .../api/command/CancelShutdownCmd.java | 10 +- .../api/command/PrepareForMaintenanceCmd.java | 72 +++ .../api/command/PrepareForShutdownCmd.java | 9 +- .../api/command/ReadyForShutdownCmd.java | 50 +- .../api/command/TriggerShutdownCmd.java | 10 +- .../ManagementServerMaintenanceResponse.java} | 80 ++- .../ManagementServerMaintenanceListener.java | 24 + .../ManagementServerMaintenanceManager.java | 108 ++++ ...anagementServerMaintenanceManagerImpl.java | 598 ++++++++++++++++++ ...seShutdownManagementServerHostCommand.java | 2 +- ...aintenanceManagementServerHostCommand.java | 26 + ...elShutdownManagementServerHostCommand.java | 2 +- ...aintenanceManagementServerHostCommand.java | 36 ++ ...orShutdownManagementServerHostCommand.java | 2 +- ...erShutdownManagementServerHostCommand.java | 2 +- .../cloudstack/maintenance}/module.properties | 2 +- .../spring-maintenance-context.xml} | 4 +- ...mentServerMaintenanceManagerImplTest.java} | 21 +- .../cloudstack/api/MetricConstants.java | 2 + .../metrics/MetricsServiceImpl.java | 2 + .../ManagementServerMetricsResponse.java | 17 + plugins/pom.xml | 2 +- .../cloudstack/shutdown/ShutdownManager.java | 60 -- .../shutdown/ShutdownManagerImpl.java | 265 -------- .../java/com/cloud/api/ApiDispatcher.java | 2 +- .../main/java/com/cloud/api/ApiServer.java | 16 +- .../com/cloud/api/query/QueryManagerImpl.java | 14 + .../api/query/dao/AsyncJobJoinDaoImpl.java | 15 +- .../cloud/api/query/dao/HostJoinDaoImpl.java | 11 + .../cloud/network/SshKeysDistriMonitor.java | 26 +- .../security/SecurityGroupListener.java | 33 +- .../cloud/resource/ResourceManagerImpl.java | 21 +- .../RollingMaintenanceManagerImpl.java | 4 +- .../ManagementServerHostStatsEntry.java | 21 + .../cloud/server/ManagementServerImpl.java | 8 +- .../java/com/cloud/server/StatsCollector.java | 3 + .../storage/listener/StoragePoolMonitor.java | 72 ++- .../agent/lb/IndirectAgentLBServiceImpl.java | 151 ++++- .../spring-server-core-managers-context.xml | 4 +- .../resource/MockResourceManagerImpl.java | 17 +- .../test/resources/createNetworkOffering.xml | 159 ++--- ... test_ms_maintenance_and_safe_shutdown.py} | 65 +- tools/apidoc/gen_toc.py | 3 +- ui/public/locales/en.json | 20 +- ui/src/components/page/GlobalLayout.vue | 22 +- ui/src/components/view/ListView.vue | 2 +- ui/src/config/section/infra/hosts.js | 3 +- .../config/section/infra/managementServers.js | 41 +- ui/src/store/getters.js | 2 + ui/src/store/modules/app.js | 6 + ui/src/store/modules/user.js | 19 + ui/src/store/mutation-types.js | 1 + ui/src/views/AutogenView.vue | 4 +- ui/src/views/infra/Confirmation.vue | 40 +- ui/src/views/infra/ConnectedAgentsTab.vue | 88 +++ .../com/cloud/utils/nio/NioConnection.java | 18 + 105 files changed, 2673 insertions(+), 714 deletions(-) rename api/src/main/java/org/apache/cloudstack/api/command/admin/host/{CancelMaintenanceCmd.java => CancelHostMaintenanceCmd.java} (98%) rename api/src/main/java/org/apache/cloudstack/api/command/admin/host/{PrepareForMaintenanceCmd.java => PrepareForHostMaintenanceCmd.java} (98%) create mode 100644 core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java create mode 100644 core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java rename plugins/{shutdown => maintenance}/pom.xml (92%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java => maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java} (85%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java (83%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java (85%) rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java (66%) rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java (85%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java => maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java} (52%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/BaseShutdownManagementServerHostCommand.java (95%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/CancelShutdownManagementServerHostCommand.java (95%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/PrepareForShutdownManagementServerHostCommand.java (95%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/TriggerShutdownManagementServerHostCommand.java (95%) rename plugins/{shutdown/src/main/resources/META-INF/cloudstack/shutdown => maintenance/src/main/resources/META-INF/cloudstack/maintenance}/module.properties (97%) rename plugins/{shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml => maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml} (83%) rename plugins/{shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java => maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java} (84%) delete mode 100644 plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java delete mode 100644 plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java rename test/integration/smoke/{test_safe_shutdown.py => test_ms_maintenance_and_safe_shutdown.py} (58%) create mode 100644 ui/src/views/infra/ConnectedAgentsTab.vue diff --git a/agent/src/main/java/com/cloud/agent/Agent.java b/agent/src/main/java/com/cloud/agent/Agent.java index c84179d6660..97803477115 100644 --- a/agent/src/main/java/com/cloud/agent/Agent.java +++ b/agent/src/main/java/com/cloud/agent/Agent.java @@ -27,6 +27,7 @@ import java.net.UnknownHostException; import java.nio.channels.ClosedChannelException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -40,6 +41,8 @@ import java.util.concurrent.atomic.AtomicInteger; import javax.naming.ConfigurationException; +import com.cloud.agent.api.MigrateAgentConnectionAnswer; +import com.cloud.agent.api.MigrateAgentConnectionCommand; import com.cloud.resource.AgentStatusUpdater; import com.cloud.resource.ResourceStatusUpdater; import com.cloud.agent.api.PingAnswer; @@ -313,7 +316,6 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } _shell.updateConnectedHost(); scavengeOldAgentObjects(); - } public void stop(final String reason, final String detail) { @@ -477,6 +479,10 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } public void sendStartup(final Link link) { + sendStartup(link, false); + } + + public void sendStartup(final Link link, boolean transfer) { final StartupCommand[] startup = _resource.initialize(); if (startup != null) { final String msHostList = _shell.getPersistentProperty(null, "host"); @@ -484,6 +490,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater for (int i = 0; i < startup.length; i++) { setupStartupCommand(startup[i]); startup[i].setMSHostList(msHostList); + startup[i].setConnectionTransferred(transfer); commands[i] = startup[i]; } final Request request = new Request(_id != null ? _id : -1, -1, commands, false, false); @@ -541,9 +548,14 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } protected void reconnect(final Link link) { - if (!_reconnectAllowed) { + reconnect(link, null, null, false); + } + + protected void reconnect(final Link link, String preferredHost, List avoidHostList, boolean forTransfer) { + if (!(forTransfer || _reconnectAllowed)) { return; } + synchronized (this) { if (_startup != null) { _startup.cancel(); @@ -575,22 +587,29 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater _shell.getBackoffAlgorithm().waitBeforeRetry(); } + String host = preferredHost; + if (StringUtils.isEmpty(host)) { + host = _shell.getNextHost(); + } + do { - final String host = _shell.getNextHost(); - _connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this); - logger.info("Reconnecting to host:{}", host); - try { - _connection.start(); - } catch (final NioConnectionException e) { - logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e); - _connection.stop(); + if (CollectionUtils.isEmpty(avoidHostList) || !avoidHostList.contains(host)) { + _connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this); + logger.info("Reconnecting to host:{}", host); try { - _connection.cleanUp(); - } catch (final IOException ex) { - logger.warn("Fail to clean up old connection. {}", ex); + _connection.start(); + } catch (final NioConnectionException e) { + logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e); + _connection.stop(); + try { + _connection.cleanUp(); + } catch (final IOException ex) { + logger.warn("Fail to clean up old connection. {}", ex); + } } } _shell.getBackoffAlgorithm().waitBeforeRetry(); + host = _shell.getNextHost(); } while (!_connection.isStartup()); _shell.updateConnectedHost(); logger.info("Connected to the host: {}", _shell.getConnectedHost()); @@ -703,6 +722,8 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } } else if (cmd instanceof SetupMSListCommand) { answer = setupManagementServerList((SetupMSListCommand) cmd); + } else if (cmd instanceof MigrateAgentConnectionCommand) { + answer = migrateAgentToOtherMS((MigrateAgentConnectionCommand) cmd); } else { if (cmd instanceof ReadyCommand) { processReadyCommand(cmd); @@ -858,6 +879,53 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater return new SetupMSListAnswer(true); } + private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) { + try { + if (CollectionUtils.isNotEmpty(cmd.getMsList())) { + processManagementServerList(cmd.getMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval()); + } + migrateAgentConnection(cmd.getAvoidMsList()); + } catch (Exception e) { + String errMsg = "Migrate agent connection failed, due to " + e.getMessage(); + logger.debug(errMsg, e); + return new MigrateAgentConnectionAnswer(errMsg); + } + return new MigrateAgentConnectionAnswer(true); + } + + private void migrateAgentConnection(List avoidMsList) { + final String[] msHosts = _shell.getHosts(); + if (msHosts == null || msHosts.length < 1) { + throw new CloudRuntimeException("Management Server hosts empty, not properly configured in agent"); + } + + List msHostsList = new ArrayList<>(Arrays.asList(msHosts)); + msHostsList.removeAll(avoidMsList); + if (msHostsList.isEmpty() || StringUtils.isEmpty(msHostsList.get(0))) { + throw new CloudRuntimeException("No other Management Server hosts to migrate"); + } + + String preferredHost = null; + for (String msHost : msHostsList) { + try (final Socket socket = new Socket()) { + socket.connect(new InetSocketAddress(msHost, _shell.getPort()), 5000); + preferredHost = msHost; + break; + } catch (final IOException e) { + throw new CloudRuntimeException("Management server host: " + msHost + " is not reachable, to migrate connection"); + } + } + + if (preferredHost == null) { + throw new CloudRuntimeException("Management server host(s) are not reachable, to migrate connection"); + } + + logger.debug("Management server host " + preferredHost + " is found to be reachable, trying to reconnect"); + _shell.resetHostCounter(); + _shell.setConnectionTransfer(true); + reconnect(_link, preferredHost, avoidMsList, true); + } + public void processResponse(final Response response, final Link link) { final Answer answer = response.getAnswer(); logger.debug("Received response: {}", response.toString()); @@ -1153,7 +1221,8 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater if (task.getType() == Task.Type.CONNECT) { _shell.getBackoffAlgorithm().reset(); setLink(task.getLink()); - sendStartup(task.getLink()); + sendStartup(task.getLink(), _shell.isConnectionTransfer()); + _shell.setConnectionTransfer(false); } else if (task.getType() == Task.Type.DATA) { Request request; try { @@ -1178,6 +1247,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater Thread.sleep(5000); } catch (InterruptedException e) { } + _shell.setConnectionTransfer(false); reconnect(task.getLink()); return; } else if (task.getType() == Task.Type.OTHER) { diff --git a/agent/src/main/java/com/cloud/agent/AgentShell.java b/agent/src/main/java/com/cloud/agent/AgentShell.java index 0699e00250b..d76e5551b45 100644 --- a/agent/src/main/java/com/cloud/agent/AgentShell.java +++ b/agent/src/main/java/com/cloud/agent/AgentShell.java @@ -77,6 +77,7 @@ public class AgentShell implements IAgentShell, Daemon { private String hostToConnect; private String connectedHost; private Long preferredHostCheckInterval; + private boolean connectionTransfer = false; protected AgentProperties agentProperties = new AgentProperties(); public AgentShell() { @@ -215,6 +216,14 @@ public class AgentShell implements IAgentShell, Daemon { _storage.persist(name, value); } + public boolean isConnectionTransfer() { + return connectionTransfer; + } + + public void setConnectionTransfer(boolean connectionTransfer) { + this.connectionTransfer = connectionTransfer; + } + void loadProperties() throws ConfigurationException { final File file = PropertiesUtil.findConfigFile("agent.properties"); diff --git a/agent/src/main/java/com/cloud/agent/IAgentShell.java b/agent/src/main/java/com/cloud/agent/IAgentShell.java index 2dd08fffd45..0b9d9e81e95 100644 --- a/agent/src/main/java/com/cloud/agent/IAgentShell.java +++ b/agent/src/main/java/com/cloud/agent/IAgentShell.java @@ -70,4 +70,8 @@ public interface IAgentShell { String getConnectedHost(); void launchNewAgent(ServerResource resource) throws ConfigurationException; + + boolean isConnectionTransfer(); + + void setConnectionTransfer(boolean connectionTransfer); } diff --git a/api/src/main/java/com/cloud/host/Host.java b/api/src/main/java/com/cloud/host/Host.java index 56b4ed75a31..afac6df5631 100644 --- a/api/src/main/java/com/cloud/host/Host.java +++ b/api/src/main/java/com/cloud/host/Host.java @@ -177,6 +177,8 @@ public interface Host extends StateObject, Identity, Partition, HAResour */ Long getManagementServerId(); + Long getLastManagementServerId(); + /* *@return removal date */ diff --git a/api/src/main/java/com/cloud/host/Status.java b/api/src/main/java/com/cloud/host/Status.java index 5dc82bbfaef..af6af82e973 100644 --- a/api/src/main/java/com/cloud/host/Status.java +++ b/api/src/main/java/com/cloud/host/Status.java @@ -127,6 +127,7 @@ public enum Status { s_fsm.addTransition(Status.Connecting, Event.HostDown, Status.Down); s_fsm.addTransition(Status.Connecting, Event.Ping, Status.Connecting); s_fsm.addTransition(Status.Connecting, Event.ManagementServerDown, Status.Disconnected); + s_fsm.addTransition(Status.Connecting, Event.StartAgentRebalance, Status.Rebalancing); s_fsm.addTransition(Status.Connecting, Event.AgentDisconnected, Status.Alert); s_fsm.addTransition(Status.Up, Event.PingTimeout, Status.Alert); s_fsm.addTransition(Status.Up, Event.AgentDisconnected, Status.Alert); diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index 2757c918ed6..562c3c418df 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -23,11 +23,11 @@ import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd; import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; @@ -51,7 +51,7 @@ public interface ResourceService { Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException; - Host cancelMaintenance(CancelMaintenanceCmd cmd); + Host cancelMaintenance(CancelHostMaintenanceCmd cmd); Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException; @@ -69,7 +69,7 @@ public interface ResourceService { List discoverHosts(AddSecondaryStorageCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException; - Host maintain(PrepareForMaintenanceCmd cmd); + Host maintain(PrepareForHostMaintenanceCmd cmd); Host declareHostAsDegraded(DeclareHostAsDegradedCmd cmd) throws NoTransitionException; diff --git a/api/src/main/java/com/cloud/server/ManagementServerHostStats.java b/api/src/main/java/com/cloud/server/ManagementServerHostStats.java index 1eea7addba3..6eb275031e8 100644 --- a/api/src/main/java/com/cloud/server/ManagementServerHostStats.java +++ b/api/src/main/java/com/cloud/server/ManagementServerHostStats.java @@ -19,6 +19,7 @@ package com.cloud.server; import java.util.Date; +import java.util.List; /** * management server related stats @@ -70,6 +71,10 @@ public interface ManagementServerHostStats { String getOsDistribution(); + List getLastAgents(); + + List getAgents(); + int getAgentCount(); long getHeapMemoryUsed(); diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index a406e2d7a72..03de07c37da 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -1136,9 +1136,12 @@ public class ApiConstants { public static final String LOGOUT = "logout"; public static final String LIST_IDPS = "listIdps"; - public static final String READY_FOR_SHUTDOWN = "readyforshutdown"; + public static final String MAINTENANCE_INITIATED = "maintenanceinitiated"; public static final String SHUTDOWN_TRIGGERED = "shutdowntriggered"; + public static final String READY_FOR_SHUTDOWN = "readyforshutdown"; public static final String PENDING_JOBS_COUNT = "pendingjobscount"; + public static final String AGENTS_COUNT = "agentscount"; + public static final String AGENTS = "agents"; public static final String PUBLIC_MTU = "publicmtu"; public static final String PRIVATE_MTU = "privatemtu"; diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java similarity index 98% rename from api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java rename to api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java index a514a61b8a4..55fe8ec23ce 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java @@ -33,7 +33,7 @@ import com.cloud.user.Account; @APICommand(name = "cancelHostMaintenance", description = "Cancels host maintenance.", responseObject = HostResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class CancelMaintenanceCmd extends BaseAsyncCmd { +public class CancelHostMaintenanceCmd extends BaseAsyncCmd { ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java index af87bbf33bb..5e229521efe 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java @@ -31,6 +31,7 @@ import org.apache.cloudstack.api.Parameter; import org.apache.cloudstack.api.response.ClusterResponse; import org.apache.cloudstack.api.response.HostResponse; import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.ManagementServerResponse; import org.apache.cloudstack.api.response.PodResponse; import org.apache.cloudstack.api.response.UserVmResponse; import org.apache.cloudstack.api.response.ZoneResponse; @@ -105,6 +106,9 @@ public class ListHostsCmd extends BaseListCmd { @Parameter(name = ApiConstants.HYPERVISOR, type = CommandType.STRING, description = "hypervisor type of host: XenServer,KVM,VMware,Hyperv,BareMetal,Simulator") private String hypervisor; + @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", since="4.21.0") + private Long managementServerId; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -189,6 +193,10 @@ public class ListHostsCmd extends BaseListCmd { return outOfBandManagementPowerState; } + public Long getManagementServerId() { + return managementServerId; + } + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java similarity index 98% rename from api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java rename to api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java index 2641c54364e..5c2b50c8723 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java @@ -33,7 +33,7 @@ import com.cloud.utils.exception.CloudRuntimeException; @APICommand(name = "prepareHostForMaintenance", description = "Prepares a host for maintenance.", responseObject = HostResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class PrepareForMaintenanceCmd extends BaseAsyncCmd { +public class PrepareForHostMaintenanceCmd extends BaseAsyncCmd { ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java index 3eeaaef2afa..5b47a7a06e4 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java @@ -83,9 +83,13 @@ public class AsyncJobResponse extends BaseResponse { @Param(description = "the unique ID of the instance/entity object related to the job") private String jobInstanceId; - @SerializedName("managementserverid") + @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) @Param(description = "the msid of the management server on which the job is running", since = "4.19") - private Long msid; + private String managementServerId; + + @SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME) + @Param(description = "the management server name of the host", since = "4.21.0") + private String managementServerName; @SerializedName(ApiConstants.CREATED) @Param(description = " the created date of the job") @@ -156,7 +160,11 @@ public class AsyncJobResponse extends BaseResponse { this.removed = removed; } - public void setMsid(Long msid) { - this.msid = msid; + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } + + public void setManagementServerName(String managementServerName) { + this.managementServerName = managementServerName; } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java index 62bcc07b16d..091d6391b31 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java @@ -186,10 +186,18 @@ public class HostResponse extends BaseResponseWithAnnotations { @Param(description = "the date and time the host was last pinged") private Date lastPinged; - @SerializedName("managementserverid") + @SerializedName(ApiConstants.VIRTUAL_MACHINE_ID) + @Param(description = "the virtual machine id for host type ConsoleProxy and SecondaryStorageVM", since = "4.21.0") + private String virtualMachineId; + + @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) @Param(description = "the management server ID of the host") private String managementServerId; + @SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME) + @Param(description = "the management server name of the host", since = "4.21.0") + private String managementServerName; + @SerializedName("clusterid") @Param(description = "the cluster ID of the host") private String clusterId; @@ -435,10 +443,18 @@ public class HostResponse extends BaseResponseWithAnnotations { this.lastPinged = lastPinged; } + public void setVirtualMachineId(String virtualMachineId) { + this.virtualMachineId = virtualMachineId; + } + public void setManagementServerId(String managementServerId) { this.managementServerId = managementServerId; } + public void setManagementServerName(String managementServerName) { + this.managementServerName = managementServerName; + } + public void setClusterId(String clusterId) { this.clusterId = clusterId; } @@ -723,10 +739,18 @@ public class HostResponse extends BaseResponseWithAnnotations { return lastPinged; } + public String getVirtualMachineId() { + return virtualMachineId; + } + public String getManagementServerId() { return managementServerId; } + public String getManagementServerName() { + return managementServerName; + } + public String getClusterId() { return clusterId; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java index 84c79d32321..43f92db84cb 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java @@ -86,6 +86,10 @@ public class LoginCmdResponse extends AuthenticationCmdResponse { @Param(description = "Two factor authentication issuer", since = "4.18.0.0") private String issuerFor2FA; + @SerializedName(value = ApiConstants.MANAGEMENT_SERVER_ID) + @Param(description = "Management Server ID that the user logged to", since = "4.21.0.0") + private String managementServerId; + public String getUsername() { return username; } @@ -211,4 +215,12 @@ public class LoginCmdResponse extends AuthenticationCmdResponse { public void setIssuerFor2FA(String issuerFor2FA) { this.issuerFor2FA = issuerFor2FA; } + + public String getManagementServerId() { + return managementServerId; + } + + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java index fc7d3b722ab..df55a63a060 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java @@ -82,6 +82,14 @@ public class ManagementServerResponse extends BaseResponse { @Param(description = "the Management Server Peers") private List peers; + @SerializedName(ApiConstants.AGENTS_COUNT) + @Param(description = "the number of host agents this Management Server is responsible for", since = "4.21.0.0") + private Long agentsCount; + + @SerializedName(ApiConstants.PENDING_JOBS_COUNT) + @Param(description = "the number of pending jobs in this Management Server", since = "4.21.0.0") + private Long pendingJobsCount; + public String getId() { return this.id; } @@ -126,6 +134,14 @@ public class ManagementServerResponse extends BaseResponse { return serviceIp; } + public Long getAgentsCount() { + return this.agentsCount; + } + + public Long getPendingJobsCount() { + return this.pendingJobsCount; + } + public void setId(String id) { this.id = id; } @@ -174,6 +190,14 @@ public class ManagementServerResponse extends BaseResponse { this.serviceIp = serviceIp; } + public void setAgentsCount(Long agentsCount) { + this.agentsCount = agentsCount; + } + + public void setPendingJobsCount(Long pendingJobsCount) { + this.pendingJobsCount = pendingJobsCount; + } + public String getKernelVersion() { return kernelVersion; } diff --git a/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java b/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java index 54a53f39578..7f81523dab7 100644 --- a/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java +++ b/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java @@ -22,7 +22,7 @@ import org.apache.cloudstack.api.InternalIdentity; public interface ManagementServerHost extends InternalIdentity, Identity, ControlledEntity { enum State { - Up, Down, PreparingToShutDown, ReadyToShutDown, ShuttingDown + Up, Down, PreparingForMaintenance, Maintenance, PreparingForShutDown, ReadyToShutDown, ShuttingDown } long getMsid(); diff --git a/client/pom.xml b/client/pom.xml index 2ef6c910509..e12e0395482 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -624,7 +624,7 @@ org.apache.cloudstack - cloud-plugin-shutdown + cloud-plugin-maintenance ${project.version} diff --git a/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java new file mode 100644 index 00000000000..33d32c7f6cc --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java @@ -0,0 +1,38 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +public class MigrateAgentConnectionAnswer extends Answer { + public MigrateAgentConnectionAnswer() { + } + + public MigrateAgentConnectionAnswer(boolean result) { + this.result = result; + } + + public MigrateAgentConnectionAnswer(String details) { + this.result = false; + this.details = details; + } + + public MigrateAgentConnectionAnswer(MigrateAgentConnectionCommand cmd, boolean result) { + super(cmd, result, null); + } +} diff --git a/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java new file mode 100644 index 00000000000..9471a68669f --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java @@ -0,0 +1,61 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +import java.util.List; + +public class MigrateAgentConnectionCommand extends Command { + private List msList; + private List avoidMsList; + private String lbAlgorithm; + private Long lbCheckInterval; + + public MigrateAgentConnectionCommand() { + } + + public MigrateAgentConnectionCommand(final List msList, final List avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) { + super(); + this.msList = msList; + this.avoidMsList = avoidMsList; + this.lbAlgorithm = lbAlgorithm; + this.lbCheckInterval = lbCheckInterval; + } + + public List getMsList() { + return msList; + } + + public List getAvoidMsList() { + return avoidMsList; + } + + public String getLbAlgorithm() { + return lbAlgorithm; + } + + public Long getLbCheckInterval() { + return lbCheckInterval; + } + + @Override + public boolean executeInSequence() { + return false; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/StartupCommand.java b/core/src/main/java/com/cloud/agent/api/StartupCommand.java index cca5e16b585..7a18ba2dccc 100644 --- a/core/src/main/java/com/cloud/agent/api/StartupCommand.java +++ b/core/src/main/java/com/cloud/agent/api/StartupCommand.java @@ -47,6 +47,7 @@ public class StartupCommand extends Command { String resourceName; String gatewayIpAddress; String msHostList; + boolean connectionTransferred; String arch; public StartupCommand(Host.Type type) { @@ -291,6 +292,14 @@ public class StartupCommand extends Command { this.msHostList = msHostList; } + public boolean isConnectionTransferred() { + return connectionTransferred; + } + + public void setConnectionTransferred(boolean connectionTransferred) { + this.connectionTransferred = connectionTransferred; + } + public String getArch() { return arch; } diff --git a/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java b/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java index ab74d9bcf85..9c6b3b5fc59 100644 --- a/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java +++ b/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java @@ -25,6 +25,7 @@ public class TransferAgentCommand extends Command { protected long agentId; protected long futureOwner; protected long currentOwner; + protected boolean isConnectionTransfer; Event event; protected TransferAgentCommand() { @@ -37,6 +38,11 @@ public class TransferAgentCommand extends Command { this.event = event; } + public TransferAgentCommand(long agentId, long currentOwner, long futureOwner, Event event, boolean isConnectionTransfer) { + this(agentId, currentOwner, futureOwner, event); + this.isConnectionTransfer = isConnectionTransfer; + } + public long getAgentId() { return agentId; } @@ -53,6 +59,10 @@ public class TransferAgentCommand extends Command { return currentOwner; } + public boolean isConnectionTransfer() { + return isConnectionTransfer; + } + @Override public boolean executeInSequence() { return false; diff --git a/core/src/main/java/com/cloud/resource/ServerResource.java b/core/src/main/java/com/cloud/resource/ServerResource.java index 1602a78d9a4..981f03b738a 100644 --- a/core/src/main/java/com/cloud/resource/ServerResource.java +++ b/core/src/main/java/com/cloud/resource/ServerResource.java @@ -50,6 +50,10 @@ public interface ServerResource extends Manager { */ StartupCommand[] initialize(); + default StartupCommand[] initialize(boolean isTransferredConnection) { + return initialize(); + } + /** * @param id id of the server to put in the PingCommand * @return PingCommand diff --git a/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java b/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java index 287769d6a76..be7563be045 100644 --- a/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java +++ b/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java @@ -189,6 +189,11 @@ public class CheckOnHostCommandTest { return 2L; }; + @Override + public Long getLastManagementServerId() { + return null; + }; + @Override public Date getRemoved() { Date date = null; diff --git a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java index 81525ca13f1..82e2d29f407 100644 --- a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java +++ b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java @@ -16,6 +16,7 @@ // under the License. package com.cloud.agent; +import java.util.List; import java.util.Map; import org.apache.cloudstack.framework.config.ConfigKey; @@ -170,4 +171,10 @@ public interface AgentManager { void notifyMonitorsOfRemovedHost(long hostId, long clusterId); void propagateChangeToAgents(Map params); + + boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs); + + List getLastAgents(); + + void setLastAgents(List lastAgents); } diff --git a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java index 343ad0fa212..3db2afb503d 100755 --- a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java +++ b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java @@ -85,6 +85,8 @@ public interface ResourceManager extends ResourceService, Configurable { public Host createHostAndAgent(Long hostId, ServerResource resource, Map details, boolean old, List hostTags, boolean forRebalance); + public Host createHostAndAgent(Long hostId, ServerResource resource, Map details, boolean old, List hostTags, boolean forRebalance, boolean isTransferredConnection); + public Host addHost(long zoneId, ServerResource resource, Type hostType, Map hostDetails); public HostVO createHostVOForConnectedAgent(StartupCommand[] cmds); diff --git a/engine/orchestration/pom.xml b/engine/orchestration/pom.xml index bf8ab14c952..437c98dac87 100755 --- a/engine/orchestration/pom.xml +++ b/engine/orchestration/pom.xml @@ -70,7 +70,7 @@ org.apache.cloudstack - cloud-plugin-shutdown + cloud-plugin-maintenance ${project.version} diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index f3add1557ce..f154eaddc1e 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -16,6 +16,7 @@ // under the License. package com.cloud.agent.manager; +import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.nio.channels.ClosedChannelException; @@ -38,6 +39,8 @@ import java.util.concurrent.locks.ReentrantLock; import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.configuration.Config; import com.cloud.org.Cluster; import com.cloud.utils.NumbersUtil; @@ -50,7 +53,10 @@ import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.framework.jobs.AsyncJob; import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceListener; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.management.ManagementServerHost; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.commons.collections.MapUtils; @@ -130,7 +136,7 @@ import org.apache.logging.log4j.ThreadContext; /** * Implementation of the Agent Manager. This class controls the connection to the agents. **/ -public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, Configurable { +public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, ManagementServerMaintenanceListener, Configurable { /** * _agents is a ConcurrentHashMap, but it is used from within a synchronized block. This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. Maybe a @@ -154,6 +160,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Inject protected HostDao _hostDao = null; @Inject + private ManagementServerHostDao _mshostDao; + @Inject protected OutOfBandManagementDao outOfBandManagementDao; @Inject protected DataCenterDao _dcDao = null; @@ -175,6 +183,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Inject protected IndirectAgentLB indirectAgentLB; + @Inject + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + protected int _retry = 2; protected long _nodeId = -1; @@ -187,6 +198,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl private int _directAgentThreadCap; + private List lastAgents = null; + protected StateMachine2 _statusStateMachine = Status.getStateMachine(); private final ConcurrentHashMap _pingMap = new ConcurrentHashMap(10007); @@ -226,6 +239,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl registerForHostEvents(new SetHostParamsListener(), true, true, false); + managementServerMaintenanceManager.registerListener(this); + _executor = new ThreadPoolExecutor(threads, threads, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentTaskPool")); _connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentConnectTaskPool")); @@ -296,6 +311,45 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl _hostMonitors.remove(id); } + @Override + public void onManagementServerMaintenance() { + logger.debug("Management server maintenance enabled"); + _monitorExecutor.shutdownNow(); + if (_connection != null) { + _connection.stop(); + + try { + _connection.cleanUp(); + } catch (final IOException e) { + logger.warn("Fail to clean up old connection", e); + } + } + _connectExecutor.shutdownNow(); + } + + @Override + public void onManagementServerCancelMaintenance() { + logger.debug("Management server maintenance disabled"); + if (_connectExecutor.isShutdown()) { + _connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentConnectTaskPool")); + _connectExecutor.allowCoreThreadTimeOut(true); + } + + startDirectlyConnectedHosts(true); + if (_connection != null) { + try { + _connection.start(); + } catch (final NioConnectionException e) { + logger.error("Error when connecting to the NioServer!", e); + } + } + + if (_monitorExecutor.isShutdown()) { + _monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor")); + _monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS); + } + } + private AgentControlAnswer handleControlCommand(final AgentAttache attache, final AgentControlCommand cmd) { AgentControlAnswer answer = null; @@ -332,6 +386,16 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl return attache; } + @Override + public List getLastAgents() { + return lastAgents; + } + + @Override + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + @Override public Answer sendTo(final Long dcId, final HypervisorType type, final Command cmd) { final List clusters = _clusterDao.listByDcHyType(dcId, type.toString()); @@ -616,10 +680,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl final long hostId = attache.getId(); final HostVO host = _hostDao.findById(hostId); for (final Pair monitor : _hostMonitors) { - logger.debug("Sending Connect to listener: {}", monitor.second().getClass().getSimpleName()); + logger.debug("Sending Connect to listener: {}, for rebalance: {}", monitor.second().getClass().getSimpleName(), forRebalance); for (int i = 0; i < cmd.length; i++) { try { - logger.debug("process connection to issue {} forRebalance == {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), forRebalance); + logger.debug("process connection to issue: {} for host: {}, forRebalance: {}, connection transferred: {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), hostId, forRebalance, cmd[i].isConnectionTransferred()); monitor.second().processConnect(host, cmd[i], forRebalance); } catch (final ConnectionException ce) { if (ce.isSetupError()) { @@ -675,7 +739,13 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Override public boolean start() { - startDirectlyConnectedHosts(); + ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId); + if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) { + _monitorExecutor.shutdownNow(); + return true; + } + + startDirectlyConnectedHosts(false); if (_connection != null) { try { @@ -690,10 +760,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl return true; } - public void startDirectlyConnectedHosts() { + public void startDirectlyConnectedHosts(final boolean forRebalance) { final List hosts = _resourceMgr.findDirectlyConnectedHosts(); for (final HostVO host : hosts) { - loadDirectlyConnectedHost(host, false); + loadDirectlyConnectedHost(host, forRebalance); } } @@ -768,6 +838,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } protected boolean loadDirectlyConnectedHost(final HostVO host, final boolean forRebalance) { + return loadDirectlyConnectedHost(host, forRebalance, false); + } + + protected boolean loadDirectlyConnectedHost(final HostVO host, final boolean forRebalance, final boolean isTransferredConnection) { boolean initialized = false; ServerResource resource = null; try { @@ -796,7 +870,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl if (forRebalance) { tapLoadingAgents(host.getId(), TapAgentsAction.Add); - final Host h = _resourceMgr.createHostAndAgent(host.getId(), resource, host.getDetails(), false, null, true); + final Host h = _resourceMgr.createHostAndAgent(host.getId(), resource, host.getDetails(), false, null, true, isTransferredConnection); tapLoadingAgents(host.getId(), TapAgentsAction.Del); return h == null ? false : true; @@ -1918,12 +1992,15 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Override public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) { - if (cmd instanceof StartupRoutingCommand) { - if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { - Map params = new HashMap(); - params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); - params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); - params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId()))); + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { + Map params = new HashMap(); + params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); + params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); + params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId()))); try { SetHostParamsCommand cmds = new SetHostParamsCommand(params); @@ -1935,8 +2012,6 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } } - } - @Override public boolean processDisconnect(final long agentId, final Status state) { return true; @@ -2004,6 +2079,11 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } } + @Override + public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { + return true; + } + private GlobalLock getHostJoinLock(Long hostId) { return GlobalLock.getInternLock(String.format("%s-%s", "Host-Join", hostId)); } diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index be327418205..732ce9d61f5 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -47,14 +47,16 @@ import org.apache.cloudstack.framework.config.ConfigDepot; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.ha.dao.HAConfigDao; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; +import org.apache.cloudstack.maintenance.command.BaseShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.managed.context.ManagedContextTimerTask; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; -import org.apache.cloudstack.shutdown.ShutdownManager; -import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.BaseShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.security.SSLUtils; @@ -74,12 +76,17 @@ import com.cloud.cluster.ClusterManagerListener; import com.cloud.cluster.ClusterServicePdu; import com.cloud.cluster.ClusteredAgentRebalanceService; import org.apache.cloudstack.management.ManagementServerHost; +import org.apache.commons.collections.CollectionUtils; + import com.cloud.cluster.ManagementServerHostVO; import com.cloud.cluster.agentlb.AgentLoadBalancerPlanner; import com.cloud.cluster.agentlb.HostTransferMapVO; import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState; import com.cloud.cluster.agentlb.dao.HostTransferMapDao; import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.cluster.dao.ManagementServerHostPeerDao; +import com.cloud.dc.DataCenterVO; +import com.cloud.dc.dao.DataCenterDao; import com.cloud.exception.AgentUnavailableException; import com.cloud.exception.OperationTimedoutException; import com.cloud.exception.UnsupportedVersionException; @@ -101,7 +108,7 @@ import com.cloud.utils.nio.Task; import com.google.gson.Gson; public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ClusteredAgentRebalanceService { - private static final ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor")); + private static ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor")); private final long rebalanceTimeOut = 300000; // 5 mins - after this time remove the agent from the transfer list public final static long STARTUP_DELAY = 5000; @@ -113,12 +120,15 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust protected HashMap _sslEngines; private final Timer _timer = new Timer("ClusteredAgentManager Timer"); boolean _agentLbHappened = false; + private int _mshostCounter = 0; @Inject protected ClusterManager _clusterMgr = null; @Inject protected ManagementServerHostDao _mshostDao; @Inject + protected ManagementServerHostPeerDao _mshostPeerDao; + @Inject protected HostTransferMapDao _hostTransferDao; @Inject protected List _lbPlanners; @@ -133,7 +143,9 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust @Inject private CAManager caService; @Inject - private ShutdownManager shutdownManager; + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + @Inject + private DataCenterDao dcDao; protected ClusteredAgentManagerImpl() { super(); @@ -172,6 +184,13 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust _timer.schedule(new DirectAgentScanTimerTask(), STARTUP_DELAY, ScanInterval.value()); logger.debug("Scheduled direct agent scan task to run at an interval of {} seconds", ScanInterval.value()); + ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId); + if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) { + s_transferExecutor.shutdownNow(); + cleanupTransferMap(_nodeId); + return true; + } + // Schedule tasks for agent rebalancing if (isAgentRebalanceEnabled()) { cleanupTransferMap(_nodeId); @@ -585,7 +604,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } @Override - public void startDirectlyConnectedHosts() { + public void startDirectlyConnectedHosts(final boolean forRebalance) { // override and let it be dummy for purpose, we will scan and load direct agents periodically. // We may also pickup agents that have been left over from other crashed management server } @@ -742,12 +761,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust @Override public boolean executeRebalanceRequest(final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event) throws AgentUnavailableException, OperationTimedoutException { + return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event, false); + } + + @Override + public boolean executeRebalanceRequest(final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException { boolean result = false; if (event == Event.RequestAgentRebalance) { return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId); } else if (event == Event.StartAgentRebalance) { try { - result = rebalanceHost(agentId, currentOwnerId, futureOwnerId); + result = rebalanceHost(agentId, currentOwnerId, futureOwnerId, isConnectionTransfer); } catch (final Exception e) { logger.warn("Unable to rebalance host id={} ({})", agentId, findAttache(agentId), e); } @@ -871,7 +895,11 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } private Answer[] sendRebalanceCommand(final long peer, final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event) { - final TransferAgentCommand transfer = new TransferAgentCommand(agentId, currentOwnerId, futureOwnerId, event); + return sendRebalanceCommand(peer, agentId, currentOwnerId, futureOwnerId, event, false); + } + + private Answer[] sendRebalanceCommand(final long peer, final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event, final boolean isConnectionTransfer) { + final TransferAgentCommand transfer = new TransferAgentCommand(agentId, currentOwnerId, futureOwnerId, event, isConnectionTransfer); final Commands commands = new Commands(Command.OnError.Stop); commands.addCommand(transfer); @@ -1004,7 +1032,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } protected boolean rebalanceHost(final long hostId, final long currentOwnerId, final long futureOwnerId) throws AgentUnavailableException { + return rebalanceHost(hostId, currentOwnerId, futureOwnerId, false); + } + protected boolean rebalanceHost(final long hostId, final long currentOwnerId, final long futureOwnerId, final boolean isConnectionTransfer) throws AgentUnavailableException { boolean result = true; if (currentOwnerId == _nodeId) { if (!startRebalance(hostId)) { @@ -1013,7 +1044,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust return false; } try { - final Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance); + final Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance, isConnectionTransfer); if (answer == null || !answer[0].getResult()) { result = false; } @@ -1043,7 +1074,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust if (result) { logger.debug("Loading directly connected host {} to the management server {} as a part of rebalance process", host, _nodeId); - result = loadDirectlyConnectedHost(host, true); + result = loadDirectlyConnectedHost(host, true, isConnectionTransfer); } else { logger.warn("Failed to disconnect {} as a part of rebalance process without notification", host); } @@ -1253,10 +1284,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } else if (cmds.length == 1 && cmds[0] instanceof TransferAgentCommand) { final TransferAgentCommand cmd = (TransferAgentCommand)cmds[0]; - logger.debug("Intercepting command for agent rebalancing: agent {} event: {}", cmd.getAgentId(), cmd.getEvent()); + logger.debug("Intercepting command for agent rebalancing: agent: {}, event: {}, connection transfer: {}", cmd.getAgentId(), cmd.getEvent(), cmd.isConnectionTransfer()); boolean result = false; try { - result = rebalanceAgent(cmd.getAgentId(), cmd.getEvent(), cmd.getCurrentOwner(), cmd.getFutureOwner()); + result = rebalanceAgent(cmd.getAgentId(), cmd.getEvent(), cmd.getCurrentOwner(), cmd.getFutureOwner(), cmd.isConnectionTransfer()); logger.debug("Result is {}", result); } catch (final AgentUnavailableException e) { @@ -1320,10 +1351,28 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } private String handleShutdownManagementServerHostCommand(BaseShutdownManagementServerHostCommand cmd) { - if (cmd instanceof PrepareForShutdownManagementServerHostCommand) { - logger.debug("Received BaseShutdownManagementServerHostCommand - preparing to shut down"); + if (cmd instanceof PrepareForMaintenanceManagementServerHostCommand) { + logger.debug("Received PrepareForMaintenanceManagementServerHostCommand - preparing for maintenance"); try { - shutdownManager.prepareForShutdown(); + managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm()); + return "Successfully prepared for maintenance"; + } catch(CloudRuntimeException e) { + return e.getMessage(); + } + } + if (cmd instanceof CancelMaintenanceManagementServerHostCommand) { + logger.debug("Received CancelMaintenanceManagementServerHostCommand - cancelling maintenance"); + try { + managementServerMaintenanceManager.cancelMaintenance(); + return "Successfully cancelled maintenance"; + } catch(CloudRuntimeException e) { + return e.getMessage(); + } + } + if (cmd instanceof PrepareForShutdownManagementServerHostCommand) { + logger.debug("Received PrepareForShutdownManagementServerHostCommand - preparing to shut down"); + try { + managementServerMaintenanceManager.prepareForShutdown(); return "Successfully prepared for shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); @@ -1332,7 +1381,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust if (cmd instanceof TriggerShutdownManagementServerHostCommand) { logger.debug("Received TriggerShutdownManagementServerHostCommand - triggering a shut down"); try { - shutdownManager.triggerShutdown(); + managementServerMaintenanceManager.triggerShutdown(); return "Successfully triggered shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); @@ -1341,8 +1390,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust if (cmd instanceof CancelShutdownManagementServerHostCommand) { logger.debug("Received CancelShutdownManagementServerHostCommand - cancelling shut down"); try { - shutdownManager.cancelShutdown(); - return "Successfully prepared for shutdown"; + managementServerMaintenanceManager.cancelShutdown(); + return "Successfully cancelled shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); } @@ -1351,6 +1400,133 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } } + @Override + public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { + if (timeoutDurationInMs <= 0) { + logger.debug(String.format("Not transferring direct agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid)); + return false; + } + + long transferStartTime = System.currentTimeMillis(); + if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) { + logger.info(String.format("No direct agent hosts available on management server node %d (id: %s), to transfer", fromMsId, fromMsUuid)); + return true; + } + + List msHosts = getUpMsHostsExcludingMs(fromMsId); + if (msHosts.isEmpty()) { + logger.warn(String.format("No management server nodes available to transfer agents from management server node %d (id: %s)", fromMsId, fromMsUuid)); + return false; + } + + logger.debug(String.format("Transferring direct agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid)); + int agentTransferFailedCount = 0; + List dataCenterList = dcDao.listAll(); + for (DataCenterVO dc : dataCenterList) { + List directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId()); + if (CollectionUtils.isEmpty(directAgentHostsInDc)) { + continue; + } + logger.debug(String.format("Transferring %d direct agents from management server node %d (id: %s) of zone %s", directAgentHostsInDc.size(), fromMsId, fromMsUuid, dc.toString())); + for (HostVO host : directAgentHostsInDc) { + long transferElapsedTimeInMs = System.currentTimeMillis() - transferStartTime; + if (transferElapsedTimeInMs >= timeoutDurationInMs) { + logger.debug(String.format("Stop transferring remaining direct agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid)); + return false; + } + + try { + if (_mshostCounter >= msHosts.size()) { + _mshostCounter = 0; + } + ManagementServerHostVO msHost = msHosts.get(_mshostCounter % msHosts.size()); + _mshostCounter++; + + _hostTransferDao.startAgentTransfering(host.getId(), fromMsId, msHost.getMsid()); + if (!rebalanceAgent(host.getId(), Event.StartAgentRebalance, fromMsId, msHost.getMsid(), true)) { + agentTransferFailedCount++; + } else { + updateLastManagementServer(host.getId(), fromMsId); + } + } catch (Exception e) { + logger.warn(String.format("Failed to transfer direct agent of the host %s from management server node %d (id: %s), due to %s", host, fromMsId, fromMsUuid, e.getMessage())); + } + } + } + + return (agentTransferFailedCount == 0); + } + + private List getDirectAgentHosts(long msId) { + List directAgentHosts = new ArrayList<>(); + List hosts = _hostDao.listHostsByMs(msId); + for (HostVO host : hosts) { + AgentAttache agent = findAttache(host.getId()); + if (agent != null && agent instanceof DirectAgentAttache) { + directAgentHosts.add(host); + } + } + + return directAgentHosts; + } + + private List getDirectAgentHostsInDc(long msId, long dcId) { + List directAgentHosts = new ArrayList<>(); + List hosts = _hostDao.listHostsByMsAndDc(msId, dcId); + for (HostVO host : hosts) { + AgentAttache agent = findAttache(host.getId()); + if (agent != null && agent instanceof DirectAgentAttache) { + directAgentHosts.add(host); + } + } + + return directAgentHosts; + } + + private List getUpMsHostsExcludingMs(long avoidMsId) { + final List msHosts = _mshostDao.listBy(ManagementServerHost.State.Up); + Iterator iterator = msHosts.iterator(); + while (iterator.hasNext()) { + ManagementServerHostVO ms = iterator.next(); + if (ms.getMsid() == avoidMsId || _mshostPeerDao.findByPeerMsAndState(ms.getId(), ManagementServerHost.State.Up) == null) { + iterator.remove(); + } + } + + return msHosts; + } + + private void updateLastManagementServer(long hostId, long msId) { + HostVO hostVO = _hostDao.findById(hostId); + if (hostVO != null) { + hostVO.setLastManagementServerId(msId); + _hostDao.update(hostId, hostVO); + } + } + + @Override + public void onManagementServerMaintenance() { + logger.debug("Management server maintenance enabled"); + s_transferExecutor.shutdownNow(); + cleanupTransferMap(_nodeId); + _agentLbHappened = false; + super.onManagementServerMaintenance(); + } + + @Override + public void onManagementServerCancelMaintenance() { + logger.debug("Management server maintenance disabled"); + super.onManagementServerCancelMaintenance(); + if (isAgentRebalanceEnabled()) { + cleanupTransferMap(_nodeId); + if (s_transferExecutor.isShutdown()) { + s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor")); + s_transferExecutor.scheduleAtFixedRate(getAgentRebalanceScanTask(), 60000, 60000, TimeUnit.MILLISECONDS); + s_transferExecutor.scheduleAtFixedRate(getTransferScanTask(), 60000, ClusteredAgentRebalanceService.DEFAULT_TRANSFER_CHECK_INTERVAL, TimeUnit.MILLISECONDS); + } + } + } + public boolean executeAgentUserRequest(final long agentId, final Event event) throws AgentUnavailableException { return executeUserRequest(agentId, event); } @@ -1359,6 +1535,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event); } + public boolean rebalanceAgent(final long agentId, final Event event, final long currentOwnerId, final long futureOwnerId, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException { + return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event, isConnectionTransfer); + } + public boolean isAgentRebalanceEnabled() { return EnableLB.value(); } diff --git a/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java b/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java index ed52eb1a241..524b1c3adb6 100644 --- a/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java +++ b/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java @@ -27,4 +27,5 @@ public interface ClusteredAgentRebalanceService { boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException; + boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException; } diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java index 053d9ac218e..8ef2de3f74d 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java @@ -372,6 +372,9 @@ public class EngineHostVO implements EngineHost, Identity { @Column(name = "mgmt_server_id") private Long managementServerId; + @Column(name = "last_mgmt_server_id") + private Long lastManagementServerId; + @Column(name = "dom0_memory") private long dom0MinMemory; @@ -556,6 +559,10 @@ public class EngineHostVO implements EngineHost, Identity { this.managementServerId = managementServerId; } + public void setLastManagementServerId(Long lastManagementServerId) { + this.lastManagementServerId = lastManagementServerId; + } + @Override public long getLastPinged() { return lastPinged; @@ -625,6 +632,11 @@ public class EngineHostVO implements EngineHost, Identity { return managementServerId; } + @Override + public Long getLastManagementServerId() { + return lastManagementServerId; + } + @Override public Date getDisconnectedOn() { return disconnectedOn; diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java index 7efc29b02a6..64eb2ac024b 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java @@ -4263,7 +4263,7 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra @Override public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) throws ConnectionException { - if (!(cmd instanceof StartupRoutingCommand)) { + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { return; } final long hostId = host.getId(); diff --git a/engine/schema/src/main/java/com/cloud/host/HostVO.java b/engine/schema/src/main/java/com/cloud/host/HostVO.java index a449eb450cf..bd6768fa0dd 100644 --- a/engine/schema/src/main/java/com/cloud/host/HostVO.java +++ b/engine/schema/src/main/java/com/cloud/host/HostVO.java @@ -404,6 +404,9 @@ public class HostVO implements Host { @Column(name = "mgmt_server_id") private Long managementServerId; + @Column(name = "last_mgmt_server_id") + private Long lastManagementServerId; + @Column(name = "dom0_memory") private long dom0MinMemory; @@ -570,6 +573,10 @@ public class HostVO implements Host { this.managementServerId = managementServerId; } + public void setLastManagementServerId(Long lastManagementServerId) { + this.lastManagementServerId = lastManagementServerId; + } + @Override public long getLastPinged() { return lastPinged; @@ -639,6 +646,11 @@ public class HostVO implements Host { return managementServerId; } + @Override + public Long getLastManagementServerId() { + return lastManagementServerId; + } + @Override public Date getDisconnectedOn() { return disconnectedOn; diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java index a2df6db44e5..abdf50ab399 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java @@ -151,12 +151,23 @@ public interface HostDao extends GenericDao, StateDao listHostsWithActiveVMs(long offeringId); + List listHostsByMsAndDc(long msId, long dcId); + + List listHostsByMs(long msId); + /** * Retrieves the number of hosts/agents this {@see ManagementServer} has responsibility over. - * @param msid the id of the {@see ManagementServer} + * @param msId the id of the {@see ManagementServer} * @return the number of hosts/agents this {@see ManagementServer} has responsibility over */ - int countByMs(long msid); + int countByMs(long msId); + + /** + * Retrieves the host ids/agents this {@see ManagementServer} has responsibility over. + * @param msId the id of the {@see ManagementServer} + * @return the host ids/agents this {@see ManagementServer} has responsibility over + */ + List listByMs(long msId); /** * Retrieves the hypervisor versions of the hosts in the datacenter which are in Up state in ascending order diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java index 63950294654..4e1be3ae0fb 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java @@ -124,7 +124,9 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao protected SearchBuilder UnmanagedApplianceSearch; protected SearchBuilder MaintenanceCountSearch; protected SearchBuilder HostTypeCountSearch; - protected SearchBuilder ResponsibleMsCountSearch; + protected SearchBuilder ResponsibleMsSearch; + protected SearchBuilder ResponsibleMsDcSearch; + protected GenericSearchBuilder ResponsibleMsIdSearch; protected SearchBuilder HostTypeZoneCountSearch; protected SearchBuilder ClusterStatusSearch; protected SearchBuilder TypeNameZoneSearch; @@ -189,9 +191,19 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao HostTypeCountSearch.and("type", HostTypeCountSearch.entity().getType(), SearchCriteria.Op.EQ); HostTypeCountSearch.done(); - ResponsibleMsCountSearch = createSearchBuilder(); - ResponsibleMsCountSearch.and("managementServerId", ResponsibleMsCountSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); - ResponsibleMsCountSearch.done(); + ResponsibleMsSearch = createSearchBuilder(); + ResponsibleMsSearch.and("managementServerId", ResponsibleMsSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsSearch.done(); + + ResponsibleMsDcSearch = createSearchBuilder(); + ResponsibleMsDcSearch.and("managementServerId", ResponsibleMsDcSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsDcSearch.and("dcId", ResponsibleMsDcSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); + ResponsibleMsDcSearch.done(); + + ResponsibleMsIdSearch = createSearchBuilder(String.class); + ResponsibleMsIdSearch.selectFields(ResponsibleMsIdSearch.entity().getUuid()); + ResponsibleMsIdSearch.and("managementServerId", ResponsibleMsIdSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsIdSearch.done(); HostTypeZoneCountSearch = createSearchBuilder(); HostTypeZoneCountSearch.and("type", HostTypeZoneCountSearch.entity().getType(), SearchCriteria.Op.EQ); @@ -1424,12 +1436,34 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao } @Override - public int countByMs(long msid) { - SearchCriteria sc = ResponsibleMsCountSearch.create(); - sc.setParameters("managementServerId", msid); + public List listHostsByMsAndDc(long msId, long dcId) { + SearchCriteria sc = ResponsibleMsDcSearch.create(); + sc.setParameters("managementServerId", msId); + sc.setParameters("dcId", dcId); + return listBy(sc); + } + + @Override + public List listHostsByMs(long msId) { + SearchCriteria sc = ResponsibleMsSearch.create(); + sc.setParameters("managementServerId", msId); + return listBy(sc); + } + + @Override + public int countByMs(long msId) { + SearchCriteria sc = ResponsibleMsSearch.create(); + sc.setParameters("managementServerId", msId); return getCount(sc); } + @Override + public List listByMs(long msId) { + SearchCriteria sc = ResponsibleMsIdSearch.create(); + sc.addAnd("managementServerId", SearchCriteria.Op.EQ, msId); + return customSearch(sc, null); + } + @Override public List listOrderedHostsHypervisorVersionsInDatacenter(long datacenterId, HypervisorType hypervisorType) { PreparedStatement pstmt = null; diff --git a/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql b/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql index 47e7bebbee4..4a5a0203a15 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql @@ -31,3 +31,6 @@ SELECT uuid(), role_id, 'quotaCreditsList', permission, sort_order FROM `cloud`.`role_permissions` rp WHERE rp.rule = 'quotaStatement' AND NOT EXISTS(SELECT 1 FROM cloud.role_permissions rp_ WHERE rp.role_id = rp_.role_id AND rp_.rule = 'quotaCreditsList'); + +CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.host', 'last_mgmt_server_id', 'bigint unsigned DEFAULT NULL COMMENT "last management server this host is connected to" AFTER `mgmt_server_id`'); + diff --git a/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java b/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java index 9dfb9e1654e..b136b8e842b 100644 --- a/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java +++ b/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java @@ -20,6 +20,12 @@ import java.util.List; public interface IndirectAgentLB { + /** + * Return list of management server addresses from host setting + * @return management servers string list + */ + List getManagementServerList(); + /** * Return list of management server addresses after applying configured lb algorithm * for a host in a zone. @@ -30,6 +36,17 @@ public interface IndirectAgentLB { */ List getManagementServerList(Long hostId, Long dcId, List orderedHostIdList); + /** + * Return list of management server addresses after applying the lb algorithm + * for a host in a zone. + * @param hostId host id (if present) + * @param dcId zone id + * @param orderedHostIdList (optional) list of ordered host id list + * @param lbAlgorithm lb algorithm + * @return management servers string list + */ + List getManagementServerList(Long hostId, Long dcId, List orderedHostIdList, String lbAlgorithm); + /** * Compares received management server list against expected list for a host in a zone. * @param hostId host id @@ -45,6 +62,8 @@ public interface IndirectAgentLB { */ String getLBAlgorithmName(); + void checkLBAlgorithmName(String lbAlgorithm); + /** * Returns the configured LB preferred host check interval (if applicable at cluster scope) * @return returns interval in seconds @@ -53,4 +72,7 @@ public interface IndirectAgentLB { void propagateMSListToAgents(); + boolean haveAgentBasedHosts(long msId); + + boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs); } diff --git a/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java b/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java index e26e32e7b2e..1b45910b88a 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java @@ -941,7 +941,7 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C try { JmxUtil.unregisterMBean("ClusterManager", "Node " + mshost.getId()); } catch (final Exception e) { - logger.warn("Unable to deregiester cluster node from JMX monitoring due to exception " + e.toString()); + logger.warn("Unable to deregister cluster node from JMX monitoring due to exception " + e.toString()); } } @@ -1063,8 +1063,12 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C logger.info("New instance of management server {}, runId {} is being started", mshost, _runId); } } else { + ManagementServerHost.State msHostState = ManagementServerHost.State.Up; + if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) { + msHostState = ManagementServerHost.State.Maintenance; + } _mshostDao.update(mshost.getId(), _runId, NetUtils.getCanonicalHostName(), version, _clusterNodeIP, _currentServiceAdapter.getServicePort(), - DateUtil.currentGMTTime()); + DateUtil.currentGMTTime(), msHostState); if (logger.isInfoEnabled()) { logger.info("Management server {}, runId {} is being started", mshost, _runId); } @@ -1102,11 +1106,17 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C if (_mshostId != null) { final ManagementServerHostVO mshost = _mshostDao.findByMsid(_msId); - final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid()); - mshost.setState(ManagementServerHost.State.Down); - mshostStatus.setLastJvmStop(new Date()); - _mshostDao.update(_mshostId, mshost); - mshostStatusDao.update(mshostStatus.getId(), mshostStatus); + if (mshost != null) { + final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid()); + mshostStatus.setLastJvmStop(new Date()); + mshostStatusDao.update(mshostStatus.getId(), mshostStatus); + + ManagementServerHost.State msHostState = ManagementServerHost.State.Down; + if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) { + msHostState = ManagementServerHost.State.Maintenance; + } + _mshostDao.updateState(mshost.getId(), msHostState); + } } _heartbeatScheduler.shutdownNow(); diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java index 96d57ee0425..6c8ffcac78b 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java @@ -33,10 +33,12 @@ public interface ManagementServerHostDao extends GenericDao getActiveList(Date cutTime); List getInactiveList(Date cutTime); @@ -47,6 +49,8 @@ public interface ManagementServerHostDao extends GenericDao listBy(ManagementServerHost.State... states); + List listNonUpStateMsIPs(); + /** * Lists msids for which hosts are orphaned, i.e. msids that hosts refer as their owning ms whilst no mshost entry exists with those msids * diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java index 27b6d52f61b..ec943a9c26b 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java @@ -35,6 +35,7 @@ import com.cloud.utils.DateUtil; import com.cloud.utils.db.DB; import com.cloud.utils.db.Filter; import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.GenericSearchBuilder; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.TransactionLegacy; @@ -46,6 +47,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase ActiveSearch; private final SearchBuilder InactiveSearch; private final SearchBuilder StateSearch; + protected GenericSearchBuilder NonUpStateMsSearch; @Override public void invalidateRunSession(long id, long runid) { @@ -77,7 +79,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase getActiveList(Date cutTime) { SearchCriteria sc = ActiveSearch.create(); @@ -205,6 +229,11 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase listNonUpStateMsIPs() { + SearchCriteria sc = NonUpStateMsSearch.create(); + sc.addAnd("state", SearchCriteria.Op.NLIKE, State.Up); + return customSearch(sc, null); + } + @Override public List listOrphanMsids() { List orphanList = new ArrayList(); diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java index 55559946cf0..37601e8ce78 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java @@ -33,4 +33,7 @@ public interface ManagementServerHostPeerDao extends GenericDao FindForUpdateSearch; private final SearchBuilder CountSearch; private final SearchBuilder ActiveSearch; + private final SearchBuilder FindByOwnerAndPeerMsSearch; + private final SearchBuilder FindByPeerMsAndStateSearch; + public ManagementServerHostPeerDaoImpl() { ClearPeerSearch = createSearchBuilder(); @@ -59,6 +62,17 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase 0; } + + @Override + public ManagementServerHostPeerVO findByOwnerAndPeerMsHost(long ownerMshost, long peerMshost, ManagementServerHost.State peerState) { + SearchCriteria sc = FindByOwnerAndPeerMsSearch.create(); + sc.setParameters("ownerMshost", ownerMshost); + sc.setParameters("peerMshost", peerMshost); + sc.setParameters("peerState", peerState); + + return findOneBy(sc); + } + + @Override + public ManagementServerHostPeerVO findByPeerMsAndState(long peerMshost, ManagementServerHost.State peerState) { + SearchCriteria sc = FindByPeerMsAndStateSearch.create(); + sc.setParameters("peerMshost", peerMshost); + sc.setParameters("peerState", peerState); + + return findOneBy(sc); + } } diff --git a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 47bf27bd6c4..448a4eb219c 100644 --- a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -174,7 +174,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, private ExecutorService _apiJobExecutor; private ExecutorService _workerJobExecutor; - private boolean asyncJobsEnabled = true; + private boolean asyncJobsDisabled = false; + private long asyncJobsDisabledTime = 0; @Override public String getConfigComponentName() { @@ -218,16 +219,48 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, return submitAsyncJob(job, false); } - private void checkShutdown() { - if (!isAsyncJobsEnabled()) { - throw new CloudRuntimeException("A shutdown has been triggered. Can not accept new jobs"); + private void checkAsyncJobAllowed(AsyncJob job) { + if (isAsyncJobsEnabled()) { + return; } + + if (job instanceof VmWorkJobVO) { + String related = job.getRelated(); + if (StringUtils.isNotBlank(related)) { + AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(Long.parseLong(related)); + if (relatedJob != null) { + long relatedJobCreatedTime = relatedJob.getCreated().getTime(); + if ((asyncJobsDisabledTime - relatedJobCreatedTime) >= 0) { + return; + } + } + } + } + + throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs"); + } + + private boolean checkSyncQueueItemAllowed(SyncQueueItemVO item) { + if (isAsyncJobsEnabled()) { + return true; + } + + Long contentId = item.getContentId(); + AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(contentId); + if (relatedJob != null) { + long relatedJobCreatedTime = relatedJob.getCreated().getTime(); + if ((asyncJobsDisabledTime - relatedJobCreatedTime) >= 0) { + return true; + } + } + + return false; } @SuppressWarnings("unchecked") @DB public long submitAsyncJob(AsyncJob job, boolean scheduleJobExecutionInContext) { - checkShutdown(); + checkAsyncJobAllowed(job); @SuppressWarnings("rawtypes") GenericDao dao = GenericDaoBase.getDao(job.getClass()); @@ -248,7 +281,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, @Override @DB public long submitAsyncJob(final AsyncJob job, final String syncObjType, final long syncObjId) { - checkShutdown(); + checkAsyncJobAllowed(job); try { @SuppressWarnings("rawtypes") @@ -860,7 +893,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, protected void reallyRun() { try { if (!isAsyncJobsEnabled()) { - logger.info("A shutdown has been triggered. Not executing any async job"); + logger.info("Maintenance or Shutdown has been initiated on this management server. Not executing any async jobs"); return; } @@ -1301,16 +1334,18 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, @Override public void enableAsyncJobs() { - this.asyncJobsEnabled = true; + this.asyncJobsDisabled = false; + this.asyncJobsDisabledTime = 0; } @Override public void disableAsyncJobs() { - this.asyncJobsEnabled = false; + this.asyncJobsDisabled = true; + this.asyncJobsDisabledTime = System.currentTimeMillis(); } @Override public boolean isAsyncJobsEnabled() { - return asyncJobsEnabled; + return !asyncJobsDisabled; } } diff --git a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java index 512715988bb..f2c5ba2228e 100644 --- a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java +++ b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java @@ -6036,6 +6036,11 @@ public class VmwareResource extends ServerResourceBase implements StoragePoolRes @Override public StartupCommand[] initialize() { + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) { try { String hostApiVersion = "4.1"; VmwareContext context = getServiceContext(); @@ -6064,6 +6069,7 @@ public class VmwareResource extends ServerResourceBase implements StoragePoolRes cmd.setHypervisorType(HypervisorType.VMware); cmd.setCluster(_cluster); cmd.setHypervisorVersion(hostApiVersion); + cmd.setConnectionTransferred(isTransferredConnection); List storageCmds = initializeLocalStorage(); StartupCommand[] answerCmds = new StartupCommand[1 + storageCmds.size()]; diff --git a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java index e06268cd6ab..79d3c4d04ff 100644 --- a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java +++ b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java @@ -3723,6 +3723,11 @@ public abstract class CitrixResourceBase extends ServerResourceBase implements S @Override public StartupCommand[] initialize() throws IllegalArgumentException { + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) throws IllegalArgumentException { final Connection conn = getConnection(); if (!getHostInfo(conn)) { logger.warn("Unable to get host information for " + _host.getIp()); @@ -3733,6 +3738,7 @@ public abstract class CitrixResourceBase extends ServerResourceBase implements S cmd.setHypervisorType(HypervisorType.XenServer); cmd.setCluster(_cluster); cmd.setPoolSync(false); + cmd.setConnectionTransferred(isTransferredConnection); try { final Pool pool = Pool.getByUuid(conn, _host.getPool()); diff --git a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java index 92e812d8d78..d0b96aecbb8 100644 --- a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java +++ b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java @@ -128,7 +128,12 @@ public class XenServer56Resource extends CitrixResourceBase { @Override public StartupCommand[] initialize() { + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) { pingXAPI(); - return super.initialize(); + return super.initialize(isTransferredConnection); } } diff --git a/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java b/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java index 5120a0cc5af..43249857d21 100644 --- a/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java +++ b/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java @@ -64,7 +64,12 @@ public class XenServerResourceNewBase extends XenServer620SP1Resource { @Override public StartupCommand[] initialize() throws IllegalArgumentException { - final StartupCommand[] cmds = super.initialize(); + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) throws IllegalArgumentException { + final StartupCommand[] cmds = super.initialize(isTransferredConnection); final Connection conn = getConnection(); Pool pool; diff --git a/plugins/shutdown/pom.xml b/plugins/maintenance/pom.xml similarity index 92% rename from plugins/shutdown/pom.xml rename to plugins/maintenance/pom.xml index 372095c55c8..fb8f64cd16c 100644 --- a/plugins/shutdown/pom.xml +++ b/plugins/maintenance/pom.xml @@ -21,8 +21,8 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - cloud-plugin-shutdown - Apache CloudStack Plugin - Safe Shutdown + cloud-plugin-maintenance + Apache CloudStack Plugin - MS Maintenance and Safe Shutdown org.apache.cloudstack cloudstack-plugins diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java index d7f4953291b..da9263ed4fd 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java @@ -25,18 +25,18 @@ import org.apache.cloudstack.api.Parameter; import org.apache.cloudstack.api.response.ManagementServerResponse; -import org.apache.cloudstack.shutdown.ShutdownManager; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; -public abstract class BaseShutdownActionCmd extends BaseCmd { +public abstract class BaseMSMaintenanceActionCmd extends BaseCmd { @Inject - protected ShutdownManager shutdownManager; + protected ManagementServerMaintenanceManager managementServerMaintenanceManager; ///////////////////////////////////////////////////// //////////////// API parameters ///////////////////// ///////////////////////////////////////////////////// - @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server", required = true) + @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", required = true) private Long managementServerId; ///////////////////////////////////////////////////// diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java new file mode 100644 index 00000000000..a0f091ef1e4 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.BaseCmd; + +import com.cloud.user.Account; + +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.acl.RoleType; + +@APICommand(name = CancelMaintenanceCmd.APINAME, + description = "Cancels maintenance of the management server", + since = "4.21.0", + responseObject = ManagementServerMaintenanceResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + authorized = {RoleType.Admin}) + +public class CancelMaintenanceCmd extends BaseMSMaintenanceActionCmd { + + public static final String APINAME = "cancelMaintenance"; + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelMaintenance(this); + response.setResponseName(getCommandName()); + response.setObjectName("cancelmaintenance"); + setResponseObject(response); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java similarity index 83% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java index aa90d7fcbdc..35bfa4f4264 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java @@ -15,24 +15,24 @@ // specific language governing permissions and limitations // under the License. - package org.apache.cloudstack.api.command; +package org.apache.cloudstack.api.command; import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = CancelShutdownCmd.APINAME, description = "Cancels a triggered shutdown", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class CancelShutdownCmd extends BaseShutdownActionCmd { +public class CancelShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "cancelShutdown"; @@ -52,7 +52,7 @@ public class CancelShutdownCmd extends BaseShutdownActionCmd { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.cancelShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("cancelshutdown"); setResponseObject(response); diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java new file mode 100644 index 00000000000..3c036c4c35f --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; + +import com.cloud.user.Account; + +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.acl.RoleType; + +@APICommand(name = PrepareForMaintenanceCmd.APINAME, + description = "Prepares management server for maintenance by preventing new jobs from being accepted after completion of active jobs and migrating the agents", + since = "4.21.0", + responseObject = ManagementServerMaintenanceResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + authorized = {RoleType.Admin}) +public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd { + public static final String APINAME = "prepareForMaintenance"; + + @Parameter(name = ApiConstants.ALGORITHM, type = CommandType.STRING, description = "indirect agents load balancer algorithm (static, roundrobin, shuffle);" + + " when this is not set, already configured algorithm from setting 'indirect.agent.lb.algorithm' is considered") + private String algorithm; + + public String getAlgorithm() { + return algorithm; + } + + public void setAlgorithm(String algorithm) { + this.algorithm = algorithm; + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForMaintenance(this); + response.setResponseName(getCommandName()); + response.setObjectName("prepareformaintenance"); + setResponseObject(response); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java index c86d2856047..273cc2743ad 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java @@ -17,22 +17,21 @@ package org.apache.cloudstack.api.command; - import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = PrepareForShutdownCmd.APINAME, description = "Prepares CloudStack for a safe manual shutdown by preventing new jobs from being accepted", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class PrepareForShutdownCmd extends BaseShutdownActionCmd { +public class PrepareForShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "prepareForShutdown"; @Override @@ -51,7 +50,7 @@ public class PrepareForShutdownCmd extends BaseShutdownActionCmd { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.prepareForShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("prepareforshutdown"); setResponseObject(response); diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java similarity index 66% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java index de4db9c0428..782b23a0422 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java @@ -17,55 +17,23 @@ package org.apache.cloudstack.api.command; -import javax.inject.Inject; - import org.apache.cloudstack.api.APICommand; -import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseCmd; -import org.apache.cloudstack.api.Parameter; -import org.apache.cloudstack.api.response.ManagementServerResponse; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; -import org.apache.cloudstack.shutdown.ShutdownManager; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import com.cloud.user.Account; @APICommand(name = ReadyForShutdownCmd.APINAME, description = "Returns the status of CloudStack, whether a shutdown has been triggered and if ready to shutdown", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class ReadyForShutdownCmd extends BaseCmd { +public class ReadyForShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "readyForShutdown"; - @Inject - private ShutdownManager shutdownManager; - - ///////////////////////////////////////////////////// - //////////////// API parameters ///////////////////// - ///////////////////////////////////////////////////// - - @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server") - private Long managementServerId; - ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// - public Long getManagementServerId() { - return managementServerId; - } - - ///////////////////////////////////////////////////// - /////////////// API Implementation/////////////////// - ///////////////////////////////////////////////////// - - @Override - public void execute() { - final ReadyForShutdownResponse response = shutdownManager.readyForShutdown(this); - response.setResponseName(getCommandName()); - response.setObjectName("readyforshutdown"); - setResponseObject(response); - } - @Override public String getCommandName() { return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; @@ -75,4 +43,16 @@ public class ReadyForShutdownCmd extends BaseCmd { public long getEntityOwnerId() { return Account.ACCOUNT_ID_SYSTEM; } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.readyForShutdown(this); + response.setResponseName(getCommandName()); + response.setObjectName("readyforshutdown"); + setResponseObject(response); + } } diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java index b4ef7c1f67a..dc729593b0d 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java @@ -22,16 +22,16 @@ import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = TriggerShutdownCmd.APINAME, - description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobbs have been completed. Triggers an immediate shutdown if forced", + description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobs have been completed.", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class TriggerShutdownCmd extends BaseShutdownActionCmd { +public class TriggerShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "triggerShutdown"; ///////////////////////////////////////////////////// @@ -54,7 +54,7 @@ public class TriggerShutdownCmd extends BaseShutdownActionCmd { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.triggerShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.triggerShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("triggershutdown"); setResponseObject(response); diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java similarity index 52% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java index d1b2353d2a3..52bd8ab3fb8 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java @@ -16,35 +16,81 @@ // under the License. package org.apache.cloudstack.api.response; +import java.util.List; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.management.ManagementServerHost.State; import com.cloud.serializer.Param; import com.google.gson.annotations.SerializedName; -public class ReadyForShutdownResponse extends BaseResponse { - @SerializedName(ApiConstants.READY_FOR_SHUTDOWN) - @Param(description = "Indicates whether CloudStack is ready to shutdown") - private Boolean readyForShutdown; +public class ManagementServerMaintenanceResponse extends BaseResponse { + @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) + @Param(description = "The id of the management server") + private String managementServerId; + + @SerializedName(ApiConstants.STATE) + @Param(description = "the state of the management server") + private State state; + + @SerializedName(ApiConstants.MAINTENANCE_INITIATED) + @Param(description = "Indicates whether maintenance has been initiated") + private Boolean maintenanceInitiated; @SerializedName(ApiConstants.SHUTDOWN_TRIGGERED) @Param(description = "Indicates whether a shutdown has been triggered") private Boolean shutdownTriggered; + @SerializedName(ApiConstants.READY_FOR_SHUTDOWN) + @Param(description = "Indicates whether CloudStack is ready to shutdown") + private Boolean readyForShutdown; + @SerializedName(ApiConstants.PENDING_JOBS_COUNT) @Param(description = "The number of jobs in progress") private Long pendingJobsCount; - @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) - @Param(description = "The id of the management server") - private Long msId; + @SerializedName(ApiConstants.AGENTS_COUNT) + @Param(description = "The number of host agents this management server is responsible for") + private Long agentsCount; - public ReadyForShutdownResponse(Long msId, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount) { - this.msId = msId; + @SerializedName(ApiConstants.AGENTS) + @Param(description = "The host agents this management server is responsible for") + private List agents; + + public ManagementServerMaintenanceResponse(String managementServerId, State state, Boolean maintenanceInitiated, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount, long agentsCount, List agents) { + this.managementServerId = managementServerId; + this.state = state; + this.maintenanceInitiated = maintenanceInitiated; this.shutdownTriggered = shutdownTriggered; this.readyForShutdown = readyForShutdown; this.pendingJobsCount = pendingJobsCount; + this.agentsCount = agentsCount; + this.agents = agents; + } + + public String getManagementServerId() { + return managementServerId; + } + + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } + + public State getState() { + return state; + } + + public void setState(State state) { + this.state = state; + } + + public Boolean getMaintenanceInitiated() { + return this.maintenanceInitiated; + } + + public void setMaintenanceInitiated(Boolean maintenanceInitiated) { + this.maintenanceInitiated = maintenanceInitiated; } public Boolean getShutdownTriggered() { @@ -71,11 +117,19 @@ public class ReadyForShutdownResponse extends BaseResponse { this.pendingJobsCount = pendingJobsCount; } - public Long getMsId() { - return msId; + public Long getAgentsCount() { + return this.agentsCount; } - public void setMsId(Long msId) { - this.msId = msId; + public void setAgentsCount(Long agentsCount) { + this.agentsCount = agentsCount; + } + + public List getAgents() { + return agents; + } + + public void setAgents(List agents) { + this.agents = agents; } } diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java new file mode 100644 index 00000000000..bd82d1b257d --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +public interface ManagementServerMaintenanceListener { + void onManagementServerMaintenance(); + + void onManagementServerCancelMaintenance(); +} diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java new file mode 100644 index 00000000000..d474f718826 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +import org.apache.cloudstack.api.command.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.CancelShutdownCmd; +import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.PrepareForShutdownCmd; +import org.apache.cloudstack.api.command.ReadyForShutdownCmd; +import org.apache.cloudstack.api.command.TriggerShutdownCmd; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.framework.config.ConfigKey; + +import com.cloud.cluster.ManagementServerHostVO; + +public interface ManagementServerMaintenanceManager { + int DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS = 60; + + ConfigKey ManagementServerMaintenanceTimeoutInMins = new ConfigKey<>(Integer.class, + "management.server.maintenance.timeout", + "Advanced", + String.valueOf(DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS), + "Timeout (in mins) for the maintenance window for the management server, default: 60 mins.", + true, + ConfigKey.Scope.Global, + null); + + void registerListener(ManagementServerMaintenanceListener listener); + + void unregisterListener(ManagementServerMaintenanceListener listener); + + void onMaintenance(); + + void onCancelMaintenance(); + + // Returns the number of pending jobs for the given management server msids. + // NOTE: This is the msid and NOT the id + long countPendingJobs(Long... msIds); + + boolean isAsyncJobsEnabled(); + + // Indicates whether a shutdown has been triggered on the current management server + boolean isShutdownTriggered(); + + // Indicates whether the current management server is preparing to shutdown + boolean isPreparingForShutdown(); + + // Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs + void triggerShutdown(); + + // Prepares the current management server to shutdown by not accepting any more async jobs + void prepareForShutdown(); + + // Cancels the shutdown on the current management server + void cancelShutdown(); + + // Indicates whether the current management server is preparing to maintenance + boolean isPreparingForMaintenance(); + + void resetPreparingForMaintenance(); + + long getMaintenanceStartTime(); + + String getLbAlgorithm(); + + // Prepares the current management server for maintenance by migrating the agents and not accepting any more async jobs + void prepareForMaintenance(String lbAlorithm); + + // Cancels maintenance of the current management server + void cancelMaintenance(); + + void cancelPreparingForMaintenance(ManagementServerHostVO msHost); + + void cancelWaitForPendingJobs(); + + // Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms + ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd); + + // Prepares the specified management server to shutdown by not accepting any more async jobs + ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd); + + // Cancels the shutdown on the specified management server + ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd); + + // Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs + ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd); + + // Prepares the specified management server to maintenance by migrating the agents and not accepting any more async jobs + ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd); + + // Cancels maintenance of the specified management server + ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd); +} diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java new file mode 100644 index 00000000000..0af8a7c114d --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java @@ -0,0 +1,598 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import javax.inject.Inject; + +import org.apache.cloudstack.agent.lb.IndirectAgentLB; +import org.apache.cloudstack.api.command.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.CancelShutdownCmd; +import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.PrepareForShutdownCmd; +import org.apache.cloudstack.api.command.ReadyForShutdownCmd; +import org.apache.cloudstack.api.command.TriggerShutdownCmd; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.config.ApiServiceConfiguration; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.framework.jobs.AsyncJobManager; +import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.management.ManagementServerHost.State; +import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand; +import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.commons.collections.CollectionUtils; + +import com.cloud.agent.AgentManager; +import com.cloud.agent.api.Command; +import com.cloud.cluster.ClusterManager; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.host.dao.HostDao; +import com.cloud.serializer.GsonHelper; +import com.cloud.utils.StringUtils; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.component.PluggableService; +import com.cloud.utils.concurrency.NamedThreadFactory; +import com.cloud.utils.exception.CloudRuntimeException; +import com.google.gson.Gson; + +public class ManagementServerMaintenanceManagerImpl extends ManagerBase implements ManagementServerMaintenanceManager, PluggableService, Configurable { + + Gson gson; + + @Inject + private AsyncJobManager jobManager; + @Inject + private ClusterManager clusterManager; + @Inject + private AgentManager agentMgr; + @Inject + private IndirectAgentLB indirectAgentLB; + @Inject + private ManagementServerHostDao msHostDao; + @Inject + private HostDao hostDao; + + private final List _listeners = new ArrayList<>(); + + private boolean shutdownTriggered = false; + private boolean preparingForShutdown = false; + private boolean preparingForMaintenance = false; + private long maintenanceStartTime = 0; + private String lbAlgorithm; + + private ScheduledExecutorService pendingJobsCheckTask; + + protected ManagementServerMaintenanceManagerImpl() { + super(); + gson = GsonHelper.getGson(); + } + + @Override + public boolean start() { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (msHost != null) { + State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance}; + if (Arrays.asList(maintenanceStates).contains(msHost.getState())) { + this.preparingForMaintenance = true; + jobManager.disableAsyncJobs(); + msHostDao.updateState(msHost.getId(), State.Maintenance); + } + } + return true; + } + + @Override + public void registerListener(ManagementServerMaintenanceListener listener) { + synchronized (_listeners) { + logger.info("Register management server maintenance listener " + listener.getClass()); + _listeners.add(listener); + } + } + + @Override + public void unregisterListener(ManagementServerMaintenanceListener listener) { + synchronized (_listeners) { + logger.info("Unregister management server maintenance listener " + listener.getClass()); + _listeners.remove(listener); + } + } + + @Override + public void onMaintenance() { + synchronized (_listeners) { + for (final ManagementServerMaintenanceListener listener : _listeners) { + logger.info("Invoke, on maintenance for listener " + listener.getClass()); + listener.onManagementServerMaintenance(); + } + } + } + + @Override + public void onCancelMaintenance() { + synchronized (_listeners) { + for (final ManagementServerMaintenanceListener listener : _listeners) { + logger.info("Invoke, on cancel maintenance for listener " + listener.getClass()); + listener.onManagementServerCancelMaintenance(); + } + } + } + + @Override + public boolean isShutdownTriggered() { + return shutdownTriggered; + } + + @Override + public boolean isPreparingForShutdown() { + return preparingForShutdown; + } + + @Override + public boolean isPreparingForMaintenance() { + return preparingForMaintenance; + } + + @Override + public void resetPreparingForMaintenance() { + preparingForMaintenance = false; + maintenanceStartTime = 0; + lbAlgorithm = null; + } + + @Override + public long getMaintenanceStartTime() { + return maintenanceStartTime; + } + + @Override + public String getLbAlgorithm() { + return lbAlgorithm; + } + + @Override + public long countPendingJobs(Long... msIds) { + return jobManager.countPendingNonPseudoJobs(msIds); + } + + @Override + public boolean isAsyncJobsEnabled() { + return jobManager.isAsyncJobsEnabled(); + } + + @Override + public void triggerShutdown() { + if (this.shutdownTriggered) { + throw new CloudRuntimeException("Shutdown has already been triggered"); + } + this.shutdownTriggered = true; + prepareForShutdown(true); + } + + private void prepareForShutdown(boolean postTrigger) { + if (!postTrigger) { + if (this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has already been initiated, cancel maintenance and try again"); + } + + // Ensure we don't throw an error if triggering a shutdown after just preparing for it + if (this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has already been triggered"); + } + } + + this.preparingForShutdown = true; + jobManager.disableAsyncJobs(); + waitForPendingJobs(); + } + + @Override + public void prepareForShutdown() { + prepareForShutdown(false); + } + + @Override + public void cancelShutdown() { + if (!this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has not been triggered"); + } + + this.preparingForShutdown = false; + this.shutdownTriggered = false; + resetPreparingForMaintenance(); + jobManager.enableAsyncJobs(); + cancelWaitForPendingJobs(); + } + + @Override + public void prepareForMaintenance(String lbAlorithm) { + if (this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has already been triggered, cancel shutdown and try again"); + } + + if (this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has already been initiated"); + } + this.preparingForMaintenance = true; + this.maintenanceStartTime = System.currentTimeMillis(); + this.lbAlgorithm = lbAlorithm; + jobManager.disableAsyncJobs(); + waitForPendingJobs(); + } + + @Override + public void cancelMaintenance() { + if (!this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has not been initiated"); + } + resetPreparingForMaintenance(); + this.preparingForShutdown = false; + this.shutdownTriggered = false; + jobManager.enableAsyncJobs(); + cancelWaitForPendingJobs(); + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (msHost != null && State.Maintenance.equals(msHost.getState())) { + onCancelMaintenance(); + } + } + + private void waitForPendingJobs() { + cancelWaitForPendingJobs(); + pendingJobsCheckTask = Executors.newScheduledThreadPool(1, new NamedThreadFactory("PendingJobsCheck")); + long pendingJobsCheckDelayInSecs = 1L; // 1 sec + long pendingJobsCheckPeriodInSecs = 3L; // every 3 secs, check more frequently for pending jobs + pendingJobsCheckTask.scheduleAtFixedRate(new CheckPendingJobsTask(this), pendingJobsCheckDelayInSecs, pendingJobsCheckPeriodInSecs, TimeUnit.SECONDS); + } + + @Override + public void cancelWaitForPendingJobs() { + if (pendingJobsCheckTask != null) { + pendingJobsCheckTask.shutdown(); + pendingJobsCheckTask = null; + } + } + + @Override + public ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd) { + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot prepare for shutdown"); + } + + if (!State.Up.equals(msHost.getState())) { + throw new CloudRuntimeException("Management server is not in the right state to prepare for shutdown"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("PrepareForShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.PreparingForShutDown); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot trigger shutdown"); + } + + if (!(State.Up.equals(msHost.getState()) || State.Maintenance.equals(msHost.getState()) || State.PreparingForShutDown.equals(msHost.getState()) || + State.ReadyToShutDown.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to trigger shutdown"); + } + + if (State.Up.equals(msHost.getState())) { + msHostDao.updateState(msHost.getId(), State.PreparingForShutDown); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("TriggerShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.ShuttingDown); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot cancel shutdown"); + } + + if (!(State.PreparingForShutDown.equals(msHost.getState()) || State.ReadyToShutDown.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to cancel shutdown"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("CancelShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.Up); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd) { + if (StringUtils.isNotBlank(cmd.getAlgorithm())) { + indirectAgentLB.checkLBAlgorithmName(cmd.getAlgorithm()); + } + + final List activeMsList = msHostDao.listBy(State.Up); + if (CollectionUtils.isEmpty(activeMsList)) { + throw new CloudRuntimeException("Cannot prepare for maintenance, no active management servers found"); + } + + if (activeMsList.size() == 1) { + throw new CloudRuntimeException("Prepare for maintenance not supported, there is only one active management server"); + } + + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Cannot prepare for maintenance, unable to find the management server"); + } + + if (!State.Up.equals(msHost.getState())) { + throw new CloudRuntimeException("Management server is not in the right state to prepare for maintenance"); + } + + final List preparingForMaintenanceMsList = msHostDao.listBy(State.PreparingForMaintenance); + if (CollectionUtils.isNotEmpty(preparingForMaintenanceMsList)) { + throw new CloudRuntimeException("Cannot prepare for maintenance, there are other management servers preparing for maintenance"); + } + + if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) { + List indirectAgentMsList = indirectAgentLB.getManagementServerList(); + indirectAgentMsList.remove(msHost.getServiceIP()); + List nonUpMsList = msHostDao.listNonUpStateMsIPs(); + indirectAgentMsList.removeAll(nonUpMsList); + if (CollectionUtils.isEmpty(indirectAgentMsList)) { + throw new CloudRuntimeException(String.format("Cannot prepare for maintenance, no other active management servers found from '%s' setting", ApiServiceConfiguration.ManagementServerAddresses.key())); + } + } + + List lastAgents = hostDao.listByMs(cmd.getManagementServerId()); + agentMgr.setLastAgents(lastAgents); + + final Command[] cmds = new Command[1]; + cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("PrepareForMaintenanceCmd result : " + result); + if (!result.startsWith("Success")) { + agentMgr.setLastAgents(null); + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot cancel maintenance"); + } + + if (!(State.Maintenance.equals(msHost.getState()) || State.PreparingForMaintenance.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to cancel maintenance"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("CancelMaintenanceCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.Up); + agentMgr.setLastAgents(null); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) { + resetPreparingForMaintenance(); + this.preparingForShutdown = false; + this.shutdownTriggered = false; + jobManager.enableAsyncJobs(); + if (msHost == null) { + msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + } + msHostDao.updateState(msHost.getId(), State.Up); + } + + private ManagementServerMaintenanceResponse prepareMaintenanceResponse(Long managementServerId) { + ManagementServerHostVO msHost; + Long[] msIds; + if (managementServerId == null) { + msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + } else { + msHost = msHostDao.findById(managementServerId); + } + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server"); + } + + State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance}; + State[] shutdownStates = {State.ShuttingDown, State.PreparingForShutDown, State.ReadyToShutDown}; + boolean maintenanceInitiatedForMS = Arrays.asList(maintenanceStates).contains(msHost.getState()); + boolean shutdownTriggeredForMS = Arrays.asList(shutdownStates).contains(msHost.getState()); + msIds = new Long[]{msHost.getMsid()}; + List agents = hostDao.listByMs(managementServerId); + long agentsCount = hostDao.countByMs(managementServerId); + long pendingJobCount = countPendingJobs(msIds); + return new ManagementServerMaintenanceResponse(msHost.getUuid(), msHost.getState(), maintenanceInitiatedForMS, shutdownTriggeredForMS, pendingJobCount == 0, pendingJobCount, agentsCount, agents); + } + + @Override + public List> getCommands() { + final List> cmdList = new ArrayList<>(); + cmdList.add(PrepareForMaintenanceCmd.class); + cmdList.add(CancelMaintenanceCmd.class); + cmdList.add(PrepareForShutdownCmd.class); + cmdList.add(CancelShutdownCmd.class); + cmdList.add(ReadyForShutdownCmd.class); + cmdList.add(TriggerShutdownCmd.class); + return cmdList; + } + + @Override + public String getConfigComponentName() { + return ManagementServerMaintenanceManager.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[]{ + ManagementServerMaintenanceTimeoutInMins + }; + } + + private final class CheckPendingJobsTask extends ManagedContextRunnable { + + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + private boolean agentsTransferTriggered = false; + + public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager) { + this.managementServerMaintenanceManager = managementServerMaintenanceManager; + } + + @Override + protected void runInContext() { + try { + // If the maintenance or shutdown has been cancelled + if (!(managementServerMaintenanceManager.isPreparingForMaintenance() || managementServerMaintenanceManager.isPreparingForShutdown())) { + logger.info("Maintenance/Shutdown cancelled, terminating the pending jobs check timer task"); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + if (managementServerMaintenanceManager.isPreparingForMaintenance() && isMaintenanceWindowExpired()) { + logger.debug("Maintenance window timeout, terminating the pending jobs check timer task"); + managementServerMaintenanceManager.cancelPreparingForMaintenance(null); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + long totalPendingJobs = managementServerMaintenanceManager.countPendingJobs(ManagementServerNode.getManagementServerId()); + int totalAgents = hostDao.countByMs(ManagementServerNode.getManagementServerId()); + String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]", + managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents); + logger.debug(msg); + + if (totalPendingJobs > 0) { + logger.info(String.format("There are %d pending jobs, trying again later", totalPendingJobs)); + return; + } + + // No more pending jobs. Good to terminate + if (managementServerMaintenanceManager.isShutdownTriggered()) { + logger.info("MS is Shutting Down Now"); + // update state to down ? + System.exit(0); + } + if (managementServerMaintenanceManager.isPreparingForMaintenance()) { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (totalAgents == 0) { + logger.info("MS is in Maintenance Mode"); + msHostDao.updateState(msHost.getId(), State.Maintenance); + managementServerMaintenanceManager.onMaintenance(); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + if (agentsTransferTriggered) { + logger.info(String.format("There are %d agents, trying again later", totalAgents)); + return; + } + + agentsTransferTriggered = true; + logger.info(String.format("Preparing for maintenance - migrating agents from management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs()); + if (!agentsMigrated) { + logger.warn(String.format("Unable to prepare for maintenance, cannot migrate indirect agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs())) { + logger.warn(String.format("Unable to prepare for maintenance, cannot transfer direct agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + } else if (managementServerMaintenanceManager.isPreparingForShutdown()) { + logger.info("MS is Ready To Shutdown"); + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + msHostDao.updateState(msHost.getId(), State.ReadyToShutDown); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + } catch (final Exception e) { + logger.error("Error trying to check/run pending jobs task", e); + } + } + + private boolean isMaintenanceWindowExpired() { + long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime(); + if (maintenanceElapsedTimeInMs >= (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000)) { + return true; + } + return false; + } + + private long remainingMaintenanceWindowInMs() { + long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime(); + long remainingMaintenanceWindowTimeInMs = (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000) - maintenanceElapsedTimeInMs; + return (remainingMaintenanceWindowTimeInMs > 0) ? remainingMaintenanceWindowTimeInMs : 0; + } + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java index 8fe33317bc0..093a5d35eba 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; import com.cloud.agent.api.Command; diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java new file mode 100644 index 00000000000..50eb73b7bca --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.maintenance.command; + +public class CancelMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { + + public CancelMaintenanceManagementServerHostCommand(long msId) { + super(msId); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java index eef44446aa1..2cbdbd2f07a 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class CancelShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java new file mode 100644 index 00000000000..8f2a4e62b32 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.maintenance.command; + +public class PrepareForMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { + String lbAlgorithm; + + public PrepareForMaintenanceManagementServerHostCommand(long msId) { + super(msId); + } + + public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm) { + super(msId); + this.lbAlgorithm = lbAlgorithm; + } + + public String getLbAlgorithm() { + return lbAlgorithm; + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java index 32a9201d551..15f04ae11e6 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class PrepareForShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java index e0d1879fa35..41e2e7e86a0 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class TriggerShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties similarity index 97% rename from plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties rename to plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties index fd85c3085ca..547afff867f 100644 --- a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties +++ b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties @@ -14,5 +14,5 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -name=shutdown +name=maintenance parent=api diff --git a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml similarity index 83% rename from plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml rename to plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml index 5318b3bf446..bc5504634ce 100644 --- a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml +++ b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml @@ -22,8 +22,8 @@ http://www.springframework.org/schema/beans/spring-beans.xsd" > - - + + diff --git a/plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java b/plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java similarity index 84% rename from plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java rename to plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java index 9f75251c93f..8e1c09bf995 100644 --- a/plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java +++ b/plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. -package org.apache.cloudstack.shutdown; +package org.apache.cloudstack.maintenance; import org.apache.cloudstack.framework.jobs.AsyncJobManager; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.Mockito; -import org.mockito.MockitoAnnotations; import org.mockito.Spy; import org.mockito.junit.MockitoJUnitRunner; @@ -34,20 +31,14 @@ import com.cloud.utils.exception.CloudRuntimeException; @RunWith(MockitoJUnitRunner.class) -public class ShutdownManagerImplTest { +public class ManagementServerMaintenanceManagerImplTest { @Spy @InjectMocks - ShutdownManagerImpl spy; + ManagementServerMaintenanceManagerImpl spy; @Mock AsyncJobManager jobManagerMock; - private AutoCloseable closeable; - - @Before - public void setUp() throws Exception { - closeable = MockitoAnnotations.openMocks(this); - } private long prepareCountPendingJobs() { long expectedCount = 1L; @@ -79,14 +70,8 @@ public class ShutdownManagerImplTest { spy.prepareForShutdown(); }); - Mockito.doNothing().when(jobManagerMock).enableAsyncJobs(); spy.cancelShutdown(); Mockito.verify(jobManagerMock).enableAsyncJobs(); } - - @After - public void tearDown() throws Exception { - closeable.close(); - } } diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java b/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java index 8c93f2e1f44..ba4822fa852 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java @@ -20,6 +20,8 @@ package org.apache.cloudstack.api; * metric local api constants */ public interface MetricConstants { + String LAST_AGENTS = "lastagents"; + String AGENTS = "agents"; String AGENT_COUNT = "agentcount"; String AVAILABLE_PROCESSORS = "availableprocessors"; String CONNECTIONS = "connections"; diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java b/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java index 51524c12912..9fca23dcee3 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java @@ -895,6 +895,8 @@ public class MetricsServiceImpl extends MutualExclusiveIdsManagerBase implements metricsResponse.setDbLocal(status.isDbLocal()); metricsResponse.setUsageLocal(status.isUsageLocal()); metricsResponse.setAvailableProcessors(status.getAvailableProcessors()); + metricsResponse.setLastAgents(status.getLastAgents()); + metricsResponse.setAgents(status.getAgents()); metricsResponse.setAgentCount(status.getAgentCount()); metricsResponse.setCollectionTime(status.getCollectionTime()); metricsResponse.setSessions(status.getSessions()); diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java b/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java index 95c3fd09c07..d96f5b14f0d 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java @@ -22,6 +22,7 @@ import org.apache.cloudstack.api.MetricConstants; import org.apache.cloudstack.api.response.ManagementServerResponse; import java.util.Date; +import java.util.List; public class ManagementServerMetricsResponse extends ManagementServerResponse { @@ -29,6 +30,14 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse { @Param(description = "the number of processors available to the JVM") private Integer availableProcessors; + @SerializedName(MetricConstants.LAST_AGENTS) + @Param(description = "the last agents this Management Server is responsible for, before preparing for maintenance", since = "4.18.1") + private List lastAgents; + + @SerializedName(MetricConstants.AGENTS) + @Param(description = "the agents this Management Server is responsible for", since = "4.18.1") + private List agents; + @SerializedName(MetricConstants.AGENT_COUNT) @Param(description = "the number of agents this Management Server is responsible for") private Integer agentCount; @@ -121,6 +130,14 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse { this.availableProcessors = availableProcessors; } + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + + public void setAgents(List agents) { + this.agents = agents; + } + public void setAgentCount(int agentCount) { this.agentCount = agentCount; } diff --git a/plugins/pom.xml b/plugins/pom.xml index 3d5da1d59ac..1667e151cfc 100755 --- a/plugins/pom.xml +++ b/plugins/pom.xml @@ -118,7 +118,7 @@ outofbandmanagement-drivers/nested-cloudstack outofbandmanagement-drivers/redfish - shutdown + maintenance storage/sharedfs/storagevm storage/image/default diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java b/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java deleted file mode 100644 index 22f43cb4f62..00000000000 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.cloudstack.shutdown; - -import org.apache.cloudstack.api.command.CancelShutdownCmd; -import org.apache.cloudstack.api.command.PrepareForShutdownCmd; -import org.apache.cloudstack.api.command.ReadyForShutdownCmd; -import org.apache.cloudstack.api.command.TriggerShutdownCmd; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; - -public interface ShutdownManager { - // Returns the number of pending jobs for the given Management server msids. - // NOTE: This is the msid and NOT the id - long countPendingJobs(Long... msIds); - - // Indicates whether a shutdown has been triggered on the current management server - boolean isShutdownTriggered(); - - // Indicates whether the current management server is preparing to shutdown - boolean isPreparingForShutdown(); - - // Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs - void triggerShutdown(); - - // Prepares the current management server to shutdown by not accepting any more async jobs - void prepareForShutdown(); - - // Cancels the shutdown on the current management server - void cancelShutdown(); - - // Returns whether the given ms can be shut down - ReadyForShutdownResponse readyForShutdown(Long managementserverid); - - // Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms - ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd); - - // Prepares the specified management server to shutdown by not accepting any more async jobs - ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd); - - // Cancels the shutdown on the specified management server - ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd); - - // Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs - ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd); -} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java b/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java deleted file mode 100644 index c33243357fc..00000000000 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java +++ /dev/null @@ -1,265 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.cloudstack.shutdown; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Timer; -import java.util.TimerTask; - -import javax.inject.Inject; - -import org.apache.cloudstack.api.command.CancelShutdownCmd; -import org.apache.cloudstack.api.command.PrepareForShutdownCmd; -import org.apache.cloudstack.api.command.ReadyForShutdownCmd; -import org.apache.cloudstack.api.command.TriggerShutdownCmd; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; -import org.apache.cloudstack.framework.jobs.AsyncJobManager; -import org.apache.cloudstack.management.ManagementServerHost.State; -import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand; -import org.apache.cloudstack.utils.identity.ManagementServerNode; - -import com.cloud.agent.api.Command; -import com.cloud.cluster.ClusterManager; -import com.cloud.cluster.ManagementServerHostVO; -import com.cloud.cluster.dao.ManagementServerHostDao; -import com.cloud.serializer.GsonHelper; -import com.cloud.utils.component.ManagerBase; -import com.cloud.utils.component.PluggableService; -import com.cloud.utils.exception.CloudRuntimeException; -import com.google.gson.Gson; - -public class ShutdownManagerImpl extends ManagerBase implements ShutdownManager, PluggableService{ - Gson gson; - - @Inject - private AsyncJobManager jobManager; - @Inject - private ManagementServerHostDao msHostDao; - @Inject - private ClusterManager clusterManager; - - private boolean shutdownTriggered = false; - private boolean preparingForShutdown = false; - - private Timer timer = new Timer(); - private TimerTask shutdownTask; - - protected ShutdownManagerImpl() { - super(); - gson = GsonHelper.getGson(); - } - - @Override - public boolean isShutdownTriggered() { - return shutdownTriggered; - } - - @Override - public boolean isPreparingForShutdown() { - return preparingForShutdown; - } - - @Override - public long countPendingJobs(Long... msIds) { - return jobManager.countPendingNonPseudoJobs(msIds); - } - - @Override - public void triggerShutdown() { - if (this.shutdownTriggered) { - throw new CloudRuntimeException("A shutdown has already been triggered"); - } - this.shutdownTriggered = true; - prepareForShutdown(true); - } - - private void prepareForShutdown(boolean postTrigger) { - // Ensure we don't throw an error if triggering a shutdown after just preparing for it - if (!postTrigger && this.preparingForShutdown) { - throw new CloudRuntimeException("A shutdown has already been triggered"); - } - this.preparingForShutdown = true; - jobManager.disableAsyncJobs(); - if (this.shutdownTask != null) { - this.shutdownTask.cancel(); - this.shutdownTask = null; - } - this.shutdownTask = new ShutdownTask(this); - long period = 30L * 1000; - long delay = period / 2; - logger.debug(String.format("Scheduling shutdown task with delay: %d and period: %d", delay, period)); - timer.scheduleAtFixedRate(shutdownTask, delay, period); - } - - @Override - public void prepareForShutdown() { - prepareForShutdown(false); - } - - @Override - public void cancelShutdown() { - if (!this.preparingForShutdown) { - throw new CloudRuntimeException("A shutdown has not been triggered"); - } - - this.preparingForShutdown = false; - this.shutdownTriggered = false; - jobManager.enableAsyncJobs(); - if (shutdownTask != null) { - shutdownTask.cancel(); - } - shutdownTask = null; - } - - @Override - public ReadyForShutdownResponse readyForShutdown(Long managementserverid) { - Long[] msIds = null; - boolean shutdownTriggeredAnywhere = false; - State[] shutdownTriggeredStates = {State.ShuttingDown, State.PreparingToShutDown, State.ReadyToShutDown}; - if (managementserverid == null) { - List msHosts = msHostDao.listBy(shutdownTriggeredStates); - if (msHosts != null && !msHosts.isEmpty()) { - msIds = new Long[msHosts.size()]; - for (int i = 0; i < msHosts.size(); i++) { - msIds[i] = msHosts.get(i).getMsid(); - } - shutdownTriggeredAnywhere = !msHosts.isEmpty(); - } - } else { - ManagementServerHostVO msHost = msHostDao.findById(managementserverid); - msIds = new Long[]{msHost.getMsid()}; - shutdownTriggeredAnywhere = Arrays.asList(shutdownTriggeredStates).contains(msHost.getState()); - } - long pendingJobCount = countPendingJobs(msIds); - return new ReadyForShutdownResponse(managementserverid, shutdownTriggeredAnywhere, pendingJobCount == 0, pendingJobCount); - } - - @Override - public ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd) { - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("PrepareForShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.PreparingToShutDown); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("TriggerShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.ShuttingDown); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("CancelShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.Up); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public List> getCommands() { - final List> cmdList = new ArrayList<>(); - cmdList.add(CancelShutdownCmd.class); - cmdList.add(PrepareForShutdownCmd.class); - cmdList.add(ReadyForShutdownCmd.class); - cmdList.add(TriggerShutdownCmd.class); - return cmdList; - } - - private final class ShutdownTask extends TimerTask { - - private ShutdownManager shutdownManager; - - public ShutdownTask(ShutdownManager shutdownManager) { - this.shutdownManager = shutdownManager; - } - - @Override - public void run() { - try { - Long totalPendingJobs = shutdownManager.countPendingJobs(ManagementServerNode.getManagementServerId()); - String msg = String.format("Checking for triggered shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d]", - shutdownManager.isShutdownTriggered(), shutdownManager.isPreparingForShutdown(), totalPendingJobs); - logger.info(msg); - - // If the shutdown has been cancelled - if (!shutdownManager.isPreparingForShutdown()) { - logger.info("Shutdown cancelled. Terminating the shutdown timer task"); - this.cancel(); - return; - } - - // No more pending jobs. Good to terminate - if (totalPendingJobs == 0) { - if (shutdownManager.isShutdownTriggered()) { - logger.info("Shutting down now"); - System.exit(0); - } - if (shutdownManager.isPreparingForShutdown()) { - logger.info("Ready to shutdown"); - ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); - msHost.setState(State.ReadyToShutDown); - msHostDao.persist(msHost); - } - } - - logger.info("Pending jobs. Trying again later"); - } catch (final Exception e) { - logger.error("Error trying to run shutdown task", e); - } - } - } -} diff --git a/server/src/main/java/com/cloud/api/ApiDispatcher.java b/server/src/main/java/com/cloud/api/ApiDispatcher.java index d8eb26ea0a7..6a43ff10f31 100644 --- a/server/src/main/java/com/cloud/api/ApiDispatcher.java +++ b/server/src/main/java/com/cloud/api/ApiDispatcher.java @@ -94,7 +94,7 @@ public class ApiDispatcher { if (asyncJobManager.isAsyncJobsEnabled()) { asyncCreationDispatchChain.dispatch(new DispatchTask(cmd, params)); } else { - throw new CloudRuntimeException("A shutdown has been triggered. Can not accept new jobs"); + throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs"); } } diff --git a/server/src/main/java/com/cloud/api/ApiServer.java b/server/src/main/java/com/cloud/api/ApiServer.java index 824d60eec81..6016b24502e 100644 --- a/server/src/main/java/com/cloud/api/ApiServer.java +++ b/server/src/main/java/com/cloud/api/ApiServer.java @@ -57,6 +57,8 @@ import javax.naming.ConfigurationException; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.user.Account; import com.cloud.user.AccountManager; import com.cloud.user.AccountManagerImpl; @@ -113,6 +115,7 @@ import org.apache.cloudstack.framework.messagebus.MessageDispatcher; import org.apache.cloudstack.framework.messagebus.MessageHandler; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.user.UserPasswordResetManager; +import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.EnumUtils; import org.apache.http.ConnectionClosedException; @@ -222,6 +225,8 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer @Inject private ProjectDao projectDao; @Inject + private ManagementServerHostDao msHostDao; + @Inject private UUIDManager uuidMgr; @Inject private UserPasswordResetManager userPasswordResetManager; @@ -471,7 +476,6 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer s_apiNameCmdClassMap.put(apiName, apiCmdList); } apiCmdList.add(cmdClass); - } setEncodeApiResponse(EncodeApiResponse.value()); @@ -1172,6 +1176,9 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer if (ApiConstants.ISSUER_FOR_2FA.equalsIgnoreCase(attrName)) { response.setIssuerFor2FA(attrObj.toString()); } + if (ApiConstants.MANAGEMENT_SERVER_ID.equalsIgnoreCase(attrName)) { + response.setManagementServerId(attrObj.toString()); + } } } response.setResponseName("loginresponse"); @@ -1249,6 +1256,13 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer session.setAttribute(ApiConstants.PROVIDER_FOR_2FA, userAcct.getUser2faProvider()); session.setAttribute(ApiConstants.ISSUER_FOR_2FA, issuerFor2FA); + if (accountMgr.isRootAdmin(userAcct.getAccountId())) { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (msHost != null && msHost.getUuid() != null) { + session.setAttribute(ApiConstants.MANAGEMENT_SERVER_ID, msHost.getUuid()); + } + } + // (bug 5483) generate a session key that the user must submit on every request to prevent CSRF, add that // to the login response so that session-based authenticators know to send the key back final SecureRandom sesssionKeyRandom = new SecureRandom(); diff --git a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java index 631cdc5b403..3964cb0b1cf 100644 --- a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java +++ b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java @@ -143,6 +143,7 @@ import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; import org.apache.cloudstack.engine.subsystem.api.storage.TemplateState; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.framework.jobs.AsyncJobManager; import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO; import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementVO; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; @@ -613,6 +614,8 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q @Inject private ManagementServerHostPeerJoinDao mshostPeerJoinDao; + @Inject + private AsyncJobManager jobManager; private SearchCriteria getMinimumCpuServiceOfferingJoinSearchCriteria(int cpu) { SearchCriteria sc = _srvOfferingJoinDao.createSearchCriteria(); @@ -2353,6 +2356,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q Long startIndex = cmd.getStartIndex(); Long pageSize = cmd.getPageSizeVal(); Hypervisor.HypervisorType hypervisorType = cmd.getHypervisor(); + Long msId = cmd.getManagementServerId(); Filter searchFilter = new Filter(HostVO.class, "id", Boolean.TRUE, startIndex, pageSize); @@ -2368,6 +2372,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q hostSearchBuilder.and("clusterId", hostSearchBuilder.entity().getClusterId(), SearchCriteria.Op.EQ); hostSearchBuilder.and("resourceState", hostSearchBuilder.entity().getResourceState(), SearchCriteria.Op.EQ); hostSearchBuilder.and("hypervisor_type", hostSearchBuilder.entity().getHypervisorType(), SearchCriteria.Op.EQ); + hostSearchBuilder.and("mgmt_server_id", hostSearchBuilder.entity().getManagementServerId(), SearchCriteria.Op.EQ); if (keyword != null) { hostSearchBuilder.and().op("keywordName", hostSearchBuilder.entity().getName(), SearchCriteria.Op.LIKE); @@ -2448,6 +2453,13 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q sc.setParameters("hypervisor_type", hypervisorType); } + if (msId != null) { + ManagementServerHostVO msHost = msHostDao.findById(msId); + if (msHost != null) { + sc.setParameters("mgmt_server_id", msHost.getMsid()); + } + } + Pair, Integer> uniqueHostPair = hostDao.searchAndCount(sc, searchFilter); Integer count = uniqueHostPair.second(); List hostIds = uniqueHostPair.first().stream().map(HostVO::getId).collect(Collectors.toList()); @@ -5426,6 +5438,8 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q mgmtResponse.addPeer(createPeerManagementServerNodeResponse(peer)); } } + mgmtResponse.setAgentsCount((long) hostDao.countByMs(mgmt.getMsid())); + mgmtResponse.setPendingJobsCount(jobManager.countPendingNonPseudoJobs(mgmt.getMsid())); mgmtResponse.setObjectName("managementserver"); return mgmtResponse; } diff --git a/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java index 319e08deb39..08b896edb17 100644 --- a/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java @@ -20,6 +20,8 @@ import java.util.Date; import java.util.List; +import javax.inject.Inject; + import org.springframework.stereotype.Component; import org.apache.cloudstack.api.ResponseObject; @@ -29,6 +31,8 @@ import org.apache.cloudstack.framework.jobs.AsyncJob; import com.cloud.api.ApiSerializerHelper; import com.cloud.api.SerializationContext; import com.cloud.api.query.vo.AsyncJobJoinVO; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.utils.db.GenericDaoBase; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; @@ -36,6 +40,9 @@ import com.cloud.utils.db.SearchCriteria; @Component public class AsyncJobJoinDaoImpl extends GenericDaoBase implements AsyncJobJoinDao { + @Inject + private ManagementServerHostDao managementServerHostDao; + private final SearchBuilder jobIdSearch; protected AsyncJobJoinDaoImpl() { @@ -63,7 +70,13 @@ public class AsyncJobJoinDaoImpl extends GenericDaoBase im jobResponse.setJobId(job.getUuid()); jobResponse.setJobStatus(job.getStatus()); jobResponse.setJobProcStatus(job.getProcessStatus()); - jobResponse.setMsid(job.getExecutingMsid()); + if (job.getExecutingMsid() != null) { + ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(job.getExecutingMsid()); + if (managementServer != null) { + jobResponse.setManagementServerId(managementServer.getUuid()); + jobResponse.setManagementServerName(managementServer.getName()); + } + } if (job.getInstanceType() != null && job.getInstanceId() != null) { jobResponse.setJobInstanceType(job.getInstanceType().toString()); diff --git a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java index 42966435d4a..feee12dcb20 100644 --- a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java @@ -58,6 +58,8 @@ import com.cloud.storage.StorageStats; import com.cloud.utils.db.GenericDaoBase; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.dao.VMInstanceDao; @Component public class HostJoinDaoImpl extends GenericDaoBase implements HostJoinDao { @@ -73,6 +75,8 @@ public class HostJoinDaoImpl extends GenericDaoBase implements @Inject private ManagementServerHostDao managementServerHostDao; @Inject + private VMInstanceDao virtualMachineDao; + @Inject private AnnotationDao annotationDao; @Inject private AccountManager accountManager; @@ -126,12 +130,19 @@ public class HostJoinDaoImpl extends GenericDaoBase implements hostResponse.setHypervisor(hypervisorType); } hostResponse.setHostType(host.getType()); + if (host.getType().equals(Host.Type.ConsoleProxy) || host.getType().equals(Host.Type.SecondaryStorageVM)) { + VMInstanceVO vm = virtualMachineDao.findVMByInstanceNameIncludingRemoved(host.getName()); + if (vm != null) { + hostResponse.setVirtualMachineId(vm.getUuid()); + } + } hostResponse.setLastPinged(new Date(host.getLastPinged())); Long mshostId = host.getManagementServerId(); if (mshostId != null) { ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(host.getManagementServerId()); if (managementServer != null) { hostResponse.setManagementServerId(managementServer.getUuid()); + hostResponse.setManagementServerName(managementServer.getName()); } } hostResponse.setName(host.getName()); diff --git a/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java b/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java index 06ccc1a63f7..373eb80349f 100644 --- a/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java +++ b/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java @@ -85,21 +85,23 @@ public class SshKeysDistriMonitor implements Listener { @Override public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException { - if (cmd instanceof StartupRoutingCommand) { - if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer || + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { - /*TODO: Get the private/public keys here*/ + /*TODO: Get the private/public keys here*/ - String pubKey = _configDao.getValue("ssh.publickey"); - String prvKey = _configDao.getValue("ssh.privatekey"); + String pubKey = _configDao.getValue("ssh.publickey"); + String prvKey = _configDao.getValue("ssh.privatekey"); - try { - ModifySshKeysCommand cmds = new ModifySshKeysCommand(pubKey, prvKey); - Commands c = new Commands(cmds); - _agentMgr.send(host.getId(), c, this); - } catch (AgentUnavailableException e) { - logger.debug("Failed to send keys to agent: {}", host); - } + try { + ModifySshKeysCommand cmds = new ModifySshKeysCommand(pubKey, prvKey); + Commands c = new Commands(cmds); + _agentMgr.send(host.getId(), c, this); + } catch (AgentUnavailableException e) { + logger.debug("Failed to send keys to agent: {}", host); } } } diff --git a/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java b/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java index 067f2fbdbb2..0c37336c09b 100644 --- a/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java +++ b/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java @@ -164,22 +164,23 @@ public class SecurityGroupListener implements Listener { if (logger.isInfoEnabled()) logger.info("Received a host startup notification"); - if (cmd instanceof StartupRoutingCommand) { - //if (Boolean.toString(true).equals(host.getDetail("can_bridge_firewall"))) { - try { - int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2); - CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval); - Commands c = new Commands(cleanupCmd); - _agentMgr.send(host.getId(), c, this); - if (logger.isInfoEnabled()) - logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval()); - } catch (AgentUnavailableException e) { - //usually hypervisors that do not understand sec group rules. - logger.debug("Unable to schedule network rules cleanup for host {}", host, e); - } - if (_workTracker != null) { - _workTracker.processConnect(host.getId()); - } + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + try { + int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2); + CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval); + Commands c = new Commands(cleanupCmd); + _agentMgr.send(host.getId(), c, this); + if (logger.isInfoEnabled()) + logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval()); + } catch (AgentUnavailableException e) { + //usually hypervisors that do not understand sec group rules. + logger.debug("Unable to schedule network rules cleanup for host {}", host, e); + } + if (_workTracker != null) { + _workTracker.processConnect(host.getId()); } } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 1349e03f205..3cb01ba058c 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -47,9 +47,9 @@ import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; @@ -1284,7 +1284,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } @Override - public Host cancelMaintenance(final CancelMaintenanceCmd cmd) { + public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) { final Long hostId = cmd.getId(); // verify input parameters @@ -1501,7 +1501,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } @Override - public Host maintain(final PrepareForMaintenanceCmd cmd) { + public Host maintain(final PrepareForHostMaintenanceCmd cmd) { final Long hostId = cmd.getId(); final HostVO host = _hostDao.findById(hostId); @@ -2508,13 +2508,17 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } private Host createHostAndAgent(final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance) { + return createHostAndAgent(resource, details, old, hostTags, forRebalance, false); + } + + private Host createHostAndAgent(final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance, final boolean isTransferredConnection) { HostVO host = null; StartupCommand[] cmds = null; boolean hostExists = false; boolean created = false; try { - cmds = resource.initialize(); + cmds = resource.initialize(isTransferredConnection); if (cmds == null) { logger.info("Unable to fully initialize the agent because no StartupCommands are returned"); return null; @@ -2685,7 +2689,12 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, @Override public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance) { - final Host host = createHostAndAgent(resource, details, old, hostTags, forRebalance); + return createHostAndAgent(hostId, resource, details, old, hostTags, forRebalance, false); + } + + @Override + public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance, boolean isTransferredConnection) { + final Host host = createHostAndAgent(resource, details, old, hostTags, forRebalance, isTransferredConnection); return host; } diff --git a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java index 72c28953021..b0f11e4fcba 100644 --- a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java @@ -32,7 +32,7 @@ import javax.naming.ConfigurationException; import org.apache.cloudstack.affinity.AffinityGroupProcessor; import org.apache.cloudstack.api.ApiCommandResourceType; import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.framework.config.ConfigKey; @@ -405,7 +405,7 @@ public class RollingMaintenanceManagerImpl extends ManagerBase implements Rollin */ private void putHostIntoMaintenance(Host host) throws InterruptedException, AgentUnavailableException { logger.debug(String.format("Trying to set %s into maintenance", host)); - PrepareForMaintenanceCmd cmd = new PrepareForMaintenanceCmd(); + PrepareForHostMaintenanceCmd cmd = new PrepareForHostMaintenanceCmd(); cmd.setId(host.getId()); resourceManager.maintain(cmd); waitForHostInMaintenance(host.getId()); diff --git a/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java b/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java index 172ab1e83eb..c23e8ed2c9d 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java +++ b/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java @@ -19,6 +19,7 @@ package com.cloud.server; import java.util.Date; +import java.util.List; public class ManagementServerHostStatsEntry implements ManagementServerHostStats { @@ -45,6 +46,8 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats private String jvmVendor; private String jvmVersion; private String osDistribution; + private List lastAgents; + private List agents; private int agentCount; private long heapMemoryUsed; @@ -199,6 +202,16 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats return osDistribution; } + @Override + public List getLastAgents() { + return lastAgents; + } + + @Override + public List getAgents() { + return agents; + } + @Override public int getAgentCount() { return agentCount; @@ -290,6 +303,14 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats this.osDistribution = osDistribution; } + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + + public void setAgents(List agents) { + this.agents = agents; + } + public void setAgentCount(int agentCount) { this.agentCount = agentCount; } diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java index 76d2943e18c..790e4bbbd38 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java +++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java @@ -101,13 +101,13 @@ import org.apache.cloudstack.api.command.admin.guest.UpdateGuestOsMappingCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.DeleteHostCmd; import org.apache.cloudstack.api.command.admin.host.FindHostsForMigrationCmd; import org.apache.cloudstack.api.command.admin.host.ListHostTagsCmd; import org.apache.cloudstack.api.command.admin.host.ListHostsCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.ReleaseHostReservationCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; @@ -3508,14 +3508,14 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe cmdList.add(MoveDomainCmd.class); cmdList.add(AddHostCmd.class); cmdList.add(AddSecondaryStorageCmd.class); - cmdList.add(CancelMaintenanceCmd.class); + cmdList.add(CancelHostMaintenanceCmd.class); cmdList.add(CancelHostAsDegradedCmd.class); cmdList.add(DeclareHostAsDegradedCmd.class); cmdList.add(DeleteHostCmd.class); cmdList.add(ListHostsCmd.class); cmdList.add(ListHostTagsCmd.class); cmdList.add(FindHostsForMigrationCmd.class); - cmdList.add(PrepareForMaintenanceCmd.class); + cmdList.add(PrepareForHostMaintenanceCmd.class); cmdList.add(ReconnectHostCmd.class); cmdList.add(UpdateHostCmd.class); cmdList.add(UpdateHostPasswordCmd.class); diff --git a/server/src/main/java/com/cloud/server/StatsCollector.java b/server/src/main/java/com/cloud/server/StatsCollector.java index 2bdc008ca1a..c70b36b4091 100644 --- a/server/src/main/java/com/cloud/server/StatsCollector.java +++ b/server/src/main/java/com/cloud/server/StatsCollector.java @@ -829,6 +829,9 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc } private void getDataBaseStatistics(ManagementServerHostStatsEntry newEntry, long msid) { + newEntry.setLastAgents(_agentMgr.getLastAgents()); + List agents = _hostDao.listByMs(msid); + newEntry.setAgents(agents); int count = _hostDao.countByMs(msid); newEntry.setAgentCount(count); } diff --git a/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java b/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java index a0e10c646b5..6f484870e72 100644 --- a/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java +++ b/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java @@ -95,49 +95,51 @@ public class StoragePoolMonitor implements Listener { @Override public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException { - if (cmd instanceof StartupRoutingCommand) { - StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd; - if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM || + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd; + if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM || scCmd.getHypervisorType() == HypervisorType.VMware || scCmd.getHypervisorType() == HypervisorType.Simulator || scCmd.getHypervisorType() == HypervisorType.Ovm || scCmd.getHypervisorType() == HypervisorType.Hyperv || scCmd.getHypervisorType() == HypervisorType.LXC || scCmd.getHypervisorType() == HypervisorType.Ovm3) { - List pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER); - List zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false); - List zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType()); - zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor); - pools.addAll(zoneStoragePoolsByTags); - List zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any); - pools.addAll(zoneStoragePoolsByAnyHypervisor); + List pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER); + List zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false); + List zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType()); + zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor); + pools.addAll(zoneStoragePoolsByTags); + List zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any); + pools.addAll(zoneStoragePoolsByAnyHypervisor); - // get the zone wide disabled pools list if global setting is true. - if (StorageManager.MountDisabledStoragePool.value()) { - pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE)); + // get the zone wide disabled pools list if global setting is true. + if (StorageManager.MountDisabledStoragePool.value()) { + pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE)); + } + + // get the cluster wide disabled pool list + if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) { + pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER)); + } + + for (StoragePoolVO pool : pools) { + if (!pool.isShared()) { + continue; } - // get the cluster wide disabled pool list - if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) { - pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER)); + if (pool.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(pool.getClusterId())) { + throw new ConnectionException(true, String.format("Unable to prepare OCFS2 nodes for pool %s", pool)); } - for (StoragePoolVO pool : pools) { - if (!pool.isShared()) { - continue; - } - - if (pool.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(pool.getClusterId())) { - throw new ConnectionException(true, String.format("Unable to prepare OCFS2 nodes for pool %s", pool)); - } - - Long hostId = host.getId(); - if (logger.isDebugEnabled()) { - logger.debug("Host {} connected, connecting host to shared pool {} and sending storage pool information ...", host, pool); - } - try { - _storageManager.connectHostToSharedPool(host, pool.getId()); - _storageManager.createCapacityEntry(pool.getId()); - } catch (Exception e) { - throw new ConnectionException(true, String.format("Unable to connect host %s to storage pool %s due to %s", host, pool, e.toString()), e); - } + Long hostId = host.getId(); + if (logger.isDebugEnabled()) { + logger.debug("Host {} connected, connecting host to shared pool {} and sending storage pool information ...", host, pool); + } + try { + _storageManager.connectHostToSharedPool(host, pool.getId()); + _storageManager.createCapacityEntry(pool.getId()); + } catch (Exception e) { + throw new ConnectionException(true, String.format("Unable to connect host %s to storage pool %s due to %s", host, pool, e.toString()), e); } } } diff --git a/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java b/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java index 97e503974cf..027a0530383 100644 --- a/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java +++ b/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java @@ -37,6 +37,11 @@ import org.apache.cloudstack.framework.config.Configurable; import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; +import com.cloud.agent.api.MigrateAgentConnectionCommand; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.dc.DataCenterVO; +import com.cloud.dc.dao.DataCenterDao; import com.cloud.host.Host; import com.cloud.host.HostVO; import com.cloud.host.dao.HostDao; @@ -44,6 +49,8 @@ import com.cloud.hypervisor.Hypervisor; import com.cloud.resource.ResourceState; import com.cloud.utils.component.ComponentLifecycleBase; import com.cloud.utils.exception.CloudRuntimeException; + +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implements IndirectAgentLB, Configurable { @@ -63,14 +70,35 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement @Inject private HostDao hostDao; @Inject + private DataCenterDao dcDao; + @Inject + private ManagementServerHostDao mshostDao; + @Inject private AgentManager agentManager; ////////////////////////////////////////////////////// /////////////// Agent MSLB Methods /////////////////// ////////////////////////////////////////////////////// + @Override + public List getManagementServerList() { + final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value(); + if (StringUtils.isEmpty(msServerAddresses)) { + throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting", + ApiServiceConfiguration.ManagementServerAddresses.key())); + } + + List msList = new ArrayList<>(Arrays.asList(msServerAddresses.replace(" ", "").split(","))); + return msList; + } + @Override public List getManagementServerList(final Long hostId, final Long dcId, final List orderedHostIdList) { + return getManagementServerList(hostId, dcId, orderedHostIdList, null); + } + + @Override + public List getManagementServerList(final Long hostId, final Long dcId, final List orderedHostIdList, String lbAlgorithm) { final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value(); if (StringUtils.isEmpty(msServerAddresses)) { throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting", @@ -90,7 +118,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement hostIdList.add(hostId); } - final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(); + final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(lbAlgorithm); final List msList = Arrays.asList(msServerAddresses.replace(" ", "").split(",")); return algorithm.sort(msList, hostIdList, hostId); } @@ -146,6 +174,30 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement return agentBasedHosts; } + private List getAllAgentBasedHosts(long msId) { + final List allHosts = hostDao.listHostsByMs(msId); + if (allHosts == null) { + return new ArrayList<>(); + } + final List agentBasedHosts = new ArrayList<>(); + for (final Host host : allHosts) { + conditionallyAddHost(agentBasedHosts, host); + } + return agentBasedHosts; + } + + private List getAllAgentBasedHostsInDc(long msId, long dcId) { + final List allHosts = hostDao.listHostsByMsAndDc(msId, dcId); + if (allHosts == null) { + return new ArrayList<>(); + } + final List agentBasedHosts = new ArrayList<>(); + for (final Host host : allHosts) { + conditionallyAddHost(agentBasedHosts, host); + } + return agentBasedHosts; + } + private void conditionallyAddHost(List agentBasedHosts, Host host) { if (host == null) { if (logger.isTraceEnabled()) { @@ -191,13 +243,33 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement agentBasedHosts.add(host); } + @Override + public boolean haveAgentBasedHosts(long msId) { + return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId)); + } + private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm() { - final String algorithm = getLBAlgorithmName(); - if (algorithmMap.containsKey(algorithm)) { - return algorithmMap.get(algorithm); + return getAgentMSLBAlgorithm(null); + } + + private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm(String lbAlgorithm) { + boolean algorithmNameFromConfig = false; + if (StringUtils.isEmpty(lbAlgorithm)) { + lbAlgorithm = getLBAlgorithmName(); + algorithmNameFromConfig = true; + } + if (algorithmMap.containsKey(lbAlgorithm)) { + return algorithmMap.get(lbAlgorithm); + } + throw new CloudRuntimeException(String.format("Algorithm %s%s not found, valid values are: %s", + lbAlgorithm, algorithmNameFromConfig? " configured for '" + IndirectAgentLBAlgorithm.key() + "'" : "", algorithmMap.keySet())); + } + + @Override + public void checkLBAlgorithmName(String lbAlgorithm) { + if (!algorithmMap.containsKey(lbAlgorithm)) { + throw new CloudRuntimeException(String.format("Invalid algorithm %s, valid values are: %s", lbAlgorithm, algorithmMap.keySet())); } - throw new CloudRuntimeException(String.format("Algorithm configured for '%s' not found, valid values are: %s", - IndirectAgentLBAlgorithm.key(), algorithmMap.keySet())); } //////////////////////////////////////////////////////////// @@ -224,6 +296,73 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement } } + @Override + public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs) { + if (timeoutDurationInMs <= 0) { + logger.debug(String.format("Not migrating indirect agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid)); + return false; + } + + logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid)); + long migrationStartTime = System.currentTimeMillis(); + if (!haveAgentBasedHosts(fromMsId)) { + logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid)); + return true; + } + + boolean lbAlgorithmChanged = false; + if (StringUtils.isNotBlank(lbAlgorithm) && !lbAlgorithm.equalsIgnoreCase(getLBAlgorithmName())) { + logger.debug(String.format("Indirect agent lb algorithm changed to %s", lbAlgorithm)); + lbAlgorithmChanged = true; + } + + final List avoidMsList = mshostDao.listNonUpStateMsIPs(); + ManagementServerHostVO ms = mshostDao.findByMsid(fromMsId); + if (ms != null && !avoidMsList.contains(ms.getServiceIP())) { + avoidMsList.add(ms.getServiceIP()); + } + + List dataCenterList = dcDao.listAll(); + for (DataCenterVO dc : dataCenterList) { + Long dcId = dc.getId(); + List orderedHostIdList = getOrderedHostIdList(dcId); + List agentBasedHostsOfMsInDc = getAllAgentBasedHostsInDc(fromMsId, dcId); + if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDc)) { + continue; + } + logger.debug(String.format("Migrating %d indirect agents from management server node %d (id: %s) of zone %s", agentBasedHostsOfMsInDc.size(), fromMsId, fromMsUuid, dc.toString())); + for (final Host host : agentBasedHostsOfMsInDc) { + long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTime; + if (migrationElapsedTimeInMs >= timeoutDurationInMs) { + logger.debug(String.format("Stop migrating remaining indirect agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid)); + return false; + } + + List msList = null; + Long lbCheckInterval = 0L; + if (lbAlgorithmChanged) { + // send new MS list when there is change in lb algorithm + msList = getManagementServerList(host.getId(), dcId, orderedHostIdList, lbAlgorithm); + lbCheckInterval = getLBPreferredHostCheckInterval(host.getClusterId()); + } + + final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval); + agentManager.easySend(host.getId(), cmd); //answer not received as the agent disconnects and reconnects to other ms + updateLastManagementServer(host.getId(), fromMsId); + } + } + + return true; + } + + private void updateLastManagementServer(long hostId, long msId) { + HostVO hostVO = hostDao.findById(hostId); + if (hostVO != null) { + hostVO.setLastManagementServerId(msId); + hostDao.update(hostId, hostVO); + } + } + private void configureAlgorithmMap() { final List algorithms = new ArrayList<>(); algorithms.add(new IndirectAgentLBStaticAlgorithm()); diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 68abe7a16f1..60c2095d5f4 100644 --- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -268,8 +268,8 @@ - - + + diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index e8b297ff188..9d3a4fbee45 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -46,9 +46,9 @@ import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; @@ -79,7 +79,7 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana * @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd) */ @Override - public Host cancelMaintenance(final CancelMaintenanceCmd cmd) { + public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) { // TODO Auto-generated method stub return null; } @@ -142,7 +142,7 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana * @see com.cloud.resource.ResourceService#maintain(com.cloud.api.commands.PrepareForMaintenanceCmd) */ @Override - public Host maintain(final PrepareForMaintenanceCmd cmd) { + public Host maintain(final PrepareForHostMaintenanceCmd cmd) { // TODO Auto-generated method stub return null; } @@ -250,6 +250,15 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana return null; } + /* (non-Javadoc) + * @see com.cloud.resource.ResourceManager#createHostAndAgent(java.lang.Long, com.cloud.resource.ServerResource, java.util.Map, boolean, java.util.List, boolean, boolean) + */ + @Override + public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance, boolean isTransferredConnection) { + // TODO Auto-generated method stub + return null; + } + /* (non-Javadoc) * @see com.cloud.resource.ResourceManager#addHost(long, com.cloud.resource.ServerResource, com.cloud.host.Host.Type, java.util.Map) */ diff --git a/server/src/test/resources/createNetworkOffering.xml b/server/src/test/resources/createNetworkOffering.xml index 99418467e0a..a3f43407c61 100644 --- a/server/src/test/resources/createNetworkOffering.xml +++ b/server/src/test/resources/createNetworkOffering.xml @@ -1,79 +1,80 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/integration/smoke/test_safe_shutdown.py b/test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py similarity index 58% rename from test/integration/smoke/test_safe_shutdown.py rename to test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py index d757bb6d068..4eff33122fe 100644 --- a/test/integration/smoke/test_safe_shutdown.py +++ b/test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py @@ -22,19 +22,19 @@ from marvin.lib.utils import * from marvin.lib.base import * from marvin.lib.common import * -class TestSafeShutdown(cloudstackTestCase): +class TestMSMaintenanceAndSafeShutdown(cloudstackTestCase): """ - Tests safely shutting down the Management Server + Tests MS maintenance and safe shutting down the Management Server """ def setUp(self): self.apiclient = self.testClient.getApiClient() + self.hypervisor = self.testClient.getHypervisorInfo() self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ self.cleanup = [] def tearDown(self): - self.startServer() - super(TestSafeShutdown, self).tearDown() + super(TestMSMaintenanceAndSafeShutdown, self).tearDown() def isServerShutdown(self): sshClient = SshClient( @@ -87,8 +87,61 @@ class TestSafeShutdown(cloudstackTestCase): {"name": "test", "displaytext": "test"} ) + def getActiveManagementServers(self): + cmd = listManagementServers.listManagementServersCmd() + servers = self.apiclient.listManagementServers(cmd) + active_servers = [] + for idx, server in enumerate(servers): + if server.state == 'Up': + active_servers.append(server.serviceip) + return active_servers + @attr(tags=["advanced", "smoke"]) - def test_01_prepare_and_cancel_shutdown(self): + def test_01_prepare_and_cancel_maintenance(self): + active_management_servers = self.getActiveManagementServers() + if len(active_management_servers) <= 1: + self.skipTest("Skipping test case, this test is intended for only multiple management servers") + + hypervisor = self.hypervisor.lower() + if hypervisor == 'kvm': + list_configurations_cmd = listConfigurations.listConfigurationsCmd() + list_configurations_cmd.name = "host" + list_configurations_response = self.apiclient.listConfigurations(list_configurations_cmd) + self.assertNotEqual(len(list_configurations_response), 0, + "Check if the list configurations API returns a non-empty response") + + for item in list_configurations_response: + if item.name == list_configurations_cmd.name: + host_config = item + + hosts = host_config.value.split(",") + if len(hosts) <= 1: + self.skipTest( + "Skipping test case, this test is intended for only multiple management server hosts configured on host setting for kvm") + + try : + prepare_for_maintenance_cmd = prepareForMaintenance.prepareForMaintenanceCmd() + prepare_for_maintenance_cmd.managementserverid = 1 + response = self.apiclient.prepareForMaintenance(prepare_for_maintenance_cmd) + self.assertEqual( + response.maintenanceinitiated, + True, + "Failed to prepare for maintenance" + ) + try : + self.run_async_cmd() + except Exception as e: + self.debug("Prepare for maintenance check successful, API failure: %s" % e) + finally : + cancel_maintenance_cmd = cancelMaintenance.cancelMaintenanceCmd() + cancel_maintenance_cmd.managementserverid = 1 + self.apiclient.cancelMaintenance(cancel_maintenance_cmd) + ## Just to be sure, run another async command + project = self.run_async_cmd() + self.cleanup.append(project) + + @attr(tags=["advanced", "smoke"]) + def test_02_prepare_and_cancel_shutdown(self): try : prepare_for_shutdown_cmd = prepareForShutdown.prepareForShutdownCmd() prepare_for_shutdown_cmd.managementserverid = 1 @@ -111,7 +164,7 @@ class TestSafeShutdown(cloudstackTestCase): self.cleanup.append(project) @attr(tags=["advanced", "smoke"]) - def test_02_trigger_shutdown(self): + def test_03_trigger_shutdown(self): try : cmd = triggerShutdown.triggerShutdownCmd() cmd.managementserverid = 1 diff --git a/tools/apidoc/gen_toc.py b/tools/apidoc/gen_toc.py index 8d28749a637..c05b8fe2798 100644 --- a/tools/apidoc/gen_toc.py +++ b/tools/apidoc/gen_toc.py @@ -233,7 +233,8 @@ known_categories = { 'listQuarantinedIp': 'IP Quarantine', 'updateQuarantinedIp': 'IP Quarantine', 'removeQuarantinedIp': 'IP Quarantine', - 'Shutdown': 'Management', + 'Shutdown': 'Maintenance', + 'Maintenance': 'Maintenance', 'addObjectStoragePool': 'Object Store', 'listObjectStoragePools': 'Object Store', 'deleteObjectStoragePool': 'Object Store', diff --git a/ui/public/locales/en.json b/ui/public/locales/en.json index 820f08b8cb4..e07962d63d0 100644 --- a/ui/public/locales/en.json +++ b/ui/public/locales/en.json @@ -349,6 +349,7 @@ "label.agent.username": "Agent username", "label.agentport": "Agent port", "label.agentstate": "Agent state", +"label.agentscount": "Number Of connected agents", "label.agree": "Agree", "label.alert": "Alert", "label.alert.details": "Alert details", @@ -470,7 +471,7 @@ "label.cachemode": "Write-cache type", "label.cancel": "Cancel", "label.cancel.shutdown": "Cancel Shutdown", -"label.cancelmaintenance": "Cancel maintenance", +"label.cancel.maintenance": "Cancel Maintenance", "label.cancel.host.as.degraded": "Cancel host as degraded", "label.capacity": "Capacity", "label.capacitybytes": "Capacity bytes", @@ -569,6 +570,7 @@ "label.confirmdeclineinvitation": "Are you sure you want to decline this project invitation?", "label.confirmpassword": "Confirm password", "label.confirmpassword.description": "Please type the same password again.", +"label.connected.agents": "Connected Agents", "label.connect": "Connect", "label.connectiontimeout": "Connection timeout", "label.conservemode": "Conserve mode", @@ -1385,6 +1387,7 @@ "label.management.server": "Management server", "label.management.servers": "Management servers", "label.management.server.peers": "Peers", +"label.managementservername": "Management Server", "label.managementservers": "Number of management servers", "label.matchall": "Match all", "label.max": "Max.", @@ -1684,6 +1687,7 @@ "label.peerstate": "Peer State", "label.peerstate.lastupdated": "Peer State Updated Time", "label.pending.jobs": "Pending Jobs", +"label.pendingjobscount": "Number Of pending jobs", "label.per.account": "Per Account", "label.per.zone": "Per zone", "label.percentage": "Percentage", @@ -1723,7 +1727,7 @@ "label.prefix": "Prefix", "label.prefix.type": "Prefix type", "label.prepare.for.shutdown": "Prepare for Shutdown", -"label.prepareformaintenance": "Prepare for Maintenance", +"label.prepare.for.maintenance": "Prepare for Maintenance", "label.presetup": "PreSetup", "label.prev": "Prev", "label.previous": "Previous", @@ -2052,6 +2056,7 @@ "label.sequence": "Sequence", "label.server": "Server", "label.server.certificate": "Server certificate", +"label.serviceip": "Service IP", "label.service.connectivity.distributedroutercapabilitycheckbox": "Distributed router", "label.service.connectivity.regionlevelvpccapabilitycheckbox": "Region level VPC", "label.service.group": "Service group", @@ -2825,7 +2830,8 @@ "message.backup.create": "Are you sure you want create an Instance backup?", "message.backup.offering.remove": "Are you sure you want to remove Instance from backup offering and delete the backup chain?", "message.backup.restore": "Please confirm that you want to restore the Instance backup?", -"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management server. It will resume accepting any new Async Jobs.", +"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management Server. It will resume accepting any new Async Jobs.", +"message.cancel.maintenance": "Please confirm that you would like to cancel the maintenance on this Management Server. It will resume accepting any new Async Jobs.", "message.certificate.upload.processing": "Certificate upload in progress", "message.change.disk.offering.sharedfs.failed": "Failed to change disk offering for the Shared FileSystem.", "message.change.disk.offering.sharedfs.processing": "Changing disk offering for the Shared FileSystem.", @@ -3341,7 +3347,8 @@ "message.please.wait.while.zone.is.being.created": "Please wait while your zone is being created; this may take a while...", "message.pod.dedicated": "Pod dedicated.", "message.pod.dedication.released": "Pod dedication released.", -"message.prepare.for.shutdown": "Please confirm that you would like to prep this Management server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.", +"message.prepare.for.shutdown": "Please confirm that you would like to prepare this Management Server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.", +"message.prepare.for.maintenance": "Please confirm that you would like to prepare this Management Server for maintenance. It will not accept any new Async Jobs.", "message.primary.storage.invalid.state": "Primary storage is not in Up state", "message.processing.complete": "Processing complete!", "message.protocol.description": "For XenServer, choose NFS, iSCSI, or PreSetup. For KVM, choose NFS, SharedMountPoint, RDB, CLVM or Gluster. For vSphere, choose NFS, PreSetup (VMFS or iSCSI or FiberChannel or vSAN or vVols) or DatastoreCluster. For Hyper-V, choose SMB/CIFS. For LXC, choose NFS or SharedMountPoint. For OVM, choose NFS or OCFS2.", @@ -3431,7 +3438,8 @@ "message.setup.physical.network.during.zone.creation.basic": "When adding a basic zone, you can set up one physical Network, which corresponds to a NIC on the hypervisor. The Network carries several types of traffic.

You may also add other traffic types onto the physical Network.", "message.shared.network.offering.warning": "Domain admins and regular Users can only create shared Networks from Network offering with the setting specifyvlan=false. Please contact an administrator to create a Network offering if this list is empty.", "message.shared.network.unsupported.for.nsx": "Shared networks aren't supported for NSX enabled zones", -"message.shutdown.triggered": "A shutdown has been triggered. CloudStack will not accept new jobs", +"message.shutdown.triggered": "Shutdown has been triggered. This Management Server will not accept new jobs", +"message.maintenance.initiated": "Maintenance has been initiated. This Management Server will not accept new jobs", "message.snapshot.additional.zones": "Snapshots will always be created in its native zone - %x, here you can select additional zone(s) where it will be copied to at creation time", "message.sourcenatip.change.warning": "WARNING: Changing the sourcenat IP address of the network will cause connectivity downtime for the Instances with NICs in the Network.", "message.sourcenatip.change.inhibited": "Changing the sourcenat to this IP of the Network to this address is inhibited as firewall rules are defined for it. This can include port forwarding or load balancing rules.\n - If this is an Isolated Network, please use updateNetwork/click the edit button.\n - If this is a VPC, first clear all other rules for this address.", @@ -3595,7 +3603,7 @@ "message.tooltip.reserved.system.netmask": "The Network prefix that defines the pod subnet. Uses CIDR notation.", "message.traffic.type.deleted": "Successfully deleted traffic type", "message.traffic.type.to.basic.zone": "traffic type to basic zone", -"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.", +"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management Server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.", "message.type.values.to.add": "Please add additional values by typing them in", "message.update.autoscale.policy.failed": "Failed to update autoscale policy", "message.update.autoscale.vmgroup.failed": "Failed to update autoscale group", diff --git a/ui/src/components/page/GlobalLayout.vue b/ui/src/components/page/GlobalLayout.vue index 6dd5c530fa5..2002ca3bfc8 100644 --- a/ui/src/components/page/GlobalLayout.vue +++ b/ui/src/components/page/GlobalLayout.vue @@ -17,11 +17,14 @@