From c3b77cb7b82bfe8e94b024e81e1721532da51a9c Mon Sep 17 00:00:00 2001 From: Vishesh Date: Mon, 15 Jan 2024 13:56:34 +0530 Subject: [PATCH 01/10] Fix host stuck in connecting state (#8502) There are a lot of test failures due to test_vm_life_cycle.py in multiple PRs due to host not available for migration of VMs. #8438 (comment) #8433 (comment) #7344 (comment) While debugging I noticed that the hosts get stuck in Connecting state because MS is waiting for a response of the ReadyCommand from the agent. Since we take a lock on connection and disconnection, restarting the agent doesn't work. To fix this, we have to restart the MS or wait for ~1 hour (default timeout). On the agent side, it gets stuck waiting for a response from the Script execution. To reproduce, run smoke/test_vm_life_cycle.py (TestSecuredVmMigration test class to be specific). Once the tests are complete, you will notice that some hosts are stuck in Connecting state. And restarting the agent fails due to the named lock. Locks on DB can be checked using the below query. SELECT * FROM performance_schema.metadata_locks INNER JOIN performance_schema.threads ON THREAD_ID = OWNER_THREAD_ID WHERE PROCESSLIST_ID <> CONNECTION_ID() \G; This PR adds a wait for the ready command and a timeout to the Script execution to ensure that the thread doesn't get stuck and the named lock from database is released. --- .../src/main/java/com/cloud/agent/manager/AgentManagerImpl.java | 1 + .../storage/datastore/provider/DefaultHostListener.java | 1 + .../kvm/resource/wrapper/LibvirtReadyCommandWrapper.java | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 023f9c18e0a..d8671ed29df 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -596,6 +596,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl final Long dcId = host.getDataCenterId(); final ReadyCommand ready = new ReadyCommand(dcId, host.getId(), NumbersUtil.enableHumanReadableSizes); + ready.setWait(60); final Answer answer = easySend(hostId, ready); if (answer == null || !answer.getResult()) { // this is tricky part for secondary storage diff --git a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java index e344a87831d..90e8742c84d 100644 --- a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java +++ b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java @@ -121,6 +121,7 @@ public class DefaultHostListener implements HypervisorHostListener { public boolean hostConnect(long hostId, long poolId) throws StorageConflictException { StoragePool pool = (StoragePool) this.dataStoreMgr.getDataStore(poolId, DataStoreRole.Primary); ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(true, pool); + cmd.setWait(60); final Answer answer = agentMgr.easySend(hostId, cmd); if (answer == null) { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java index fc57cd412f0..4df74decdea 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java @@ -55,7 +55,7 @@ public final class LibvirtReadyCommandWrapper extends CommandWrapper Date: Thu, 18 Jan 2024 13:36:28 +0100 Subject: [PATCH 02/10] scripts: mark multipath scripts as executable (#8524) This PR marks the multipath scripts as executable. This fixes the issue that in 4.19.0.0-RC2, vms can not be stopped in ubuntu hosts. 2024-01-17 12:56:26,061 ERROR [c.c.v.VmWorkJobHandlerProxy] (Work-Job-Executor-4:ctx-e3503563 job-38/job-39 ctx-42706275) (logid:81ede4e9) Invocation exception, caused by: com.cloud.utils.exception.CloudRuntimeException: Unable to stop the virtual machine due to java.lang.NullPointerException at com.cloud.utils.script.Script.getExitValue(Script.java:74) at com.cloud.hypervisor.kvm.storage.MultipathSCSIAdapterBase.runScript(MultipathSCSIAdapterBase.java:476) at com.cloud.hypervisor.kvm.storage.MultipathSCSIAdapterBase.disconnectPhysicalDiskByPath(MultipathSCSIAdapterBase.java:226) at com.cloud.hypervisor.kvm.storage.KVMStoragePoolManager.disconnectPhysicalDiskByPath(KVMStoragePoolManager.java:205) at com.cloud.hypervisor.kvm.resource.LibvirtComputingResource.cleanupDisk(LibvirtComputingResource.java:3335) at com.cloud.hypervisor.kvm.resource.wrapper.LibvirtStopCommandWrapper.execute(LibvirtStopCommandWrapper.java:101) at com.cloud.hypervisor.kvm.resource.wrapper.LibvirtStopCommandWrapper.execute(LibvirtStopCommandWrapper.java:49) at com.cloud.hypervisor.kvm.resource.wrapper.LibvirtRequestWrapper.execute(LibvirtRequestWrapper.java:78) at com.cloud.hypervisor.kvm.resource.LibvirtComputingResource.executeRequest(LibvirtComputingResource.java:1903) --- scripts/storage/multipath/cleanStaleMaps.sh | 0 scripts/storage/multipath/connectVolume.sh | 0 scripts/storage/multipath/copyVolume.sh | 0 scripts/storage/multipath/disconnectVolume.sh | 0 scripts/storage/multipath/resizeVolume.sh | 0 5 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/storage/multipath/cleanStaleMaps.sh mode change 100644 => 100755 scripts/storage/multipath/connectVolume.sh mode change 100644 => 100755 scripts/storage/multipath/copyVolume.sh mode change 100644 => 100755 scripts/storage/multipath/disconnectVolume.sh mode change 100644 => 100755 scripts/storage/multipath/resizeVolume.sh diff --git a/scripts/storage/multipath/cleanStaleMaps.sh b/scripts/storage/multipath/cleanStaleMaps.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/connectVolume.sh b/scripts/storage/multipath/connectVolume.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/copyVolume.sh b/scripts/storage/multipath/copyVolume.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/disconnectVolume.sh b/scripts/storage/multipath/disconnectVolume.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/resizeVolume.sh b/scripts/storage/multipath/resizeVolume.sh old mode 100644 new mode 100755 From 3bcf6f0faf496e28c39429303b2187175877d426 Mon Sep 17 00:00:00 2001 From: Andrija Panic <45762285+andrijapanicsb@users.noreply.github.com> Date: Thu, 18 Jan 2024 13:37:07 +0100 Subject: [PATCH 03/10] Rename "Import QCOW...." to "Import QCOW2....." (#8519) Minor UI updates, renaming "Import QCOW...." to "Import QCOW2....." --- ui/public/locales/en.json | 10 +++++----- ui/src/views/tools/ManageInstances.vue | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ui/public/locales/en.json b/ui/public/locales/en.json index 443b086f580..b86e39aba7b 100644 --- a/ui/public/locales/en.json +++ b/ui/public/locales/en.json @@ -675,9 +675,9 @@ "label.deploymentplanner": "Deployment planner", "label.desc.db.stats": "Database Statistics", "label.desc.importexportinstancewizard": "Import and export Instances to/from an existing VMware or KVM cluster.", -"label.desc.import.ext.kvm.wizard": "Import libvirt domain from KVM Host", -"label.desc.import.local.kvm.wizard": "Import QCOW image from Local Storage", -"label.desc.import.shared.kvm.wizard": "Import QCOW image from Shared Storage", +"label.desc.import.ext.kvm.wizard": "Import Instance from remote KVM host", +"label.desc.import.local.kvm.wizard": "Import QCOW2 image from Local Storage", +"label.desc.import.shared.kvm.wizard": "Import QCOW2 image from Shared Storage", "label.desc.ingesttinstancewizard": "Ingest instances from an external KVM host", "label.desc.importmigratefromvmwarewizard": "Import instances from VMware into a KVM cluster", "label.desc.usage.stats": "Usage Server Statistics", @@ -2699,8 +2699,8 @@ "message.desc.host": "Each cluster must contain at least one host (computer) for guest Instances to run on. We will add the first host now. For a host to function in CloudStack, you must install hypervisor software on the host, assign an IP address to the host, and ensure the host is connected to the CloudStack management server.

Give the host's DNS or IP address, the user name (usually root) and password, and any labels you use to categorize hosts.", "message.desc.importingestinstancewizard": "This feature only applies to libvirt based KVM instances. Only Stopped instances can be ingested", "message.desc.import.ext.kvm.wizard": "Import libvirt domain from External KVM Host not managed by CloudStack", -"message.desc.import.local.kvm.wizard": "Import QCOW image from Local Storage of selected KVM Host", -"message.desc.import.shared.kvm.wizard": "Import QCOW image from selected Primary Storage Pool", +"message.desc.import.local.kvm.wizard": "Import QCOW2 image from Local Storage of selected KVM Host", +"message.desc.import.shared.kvm.wizard": "Import QCOW2 image from selected Primary Storage Pool", "message.desc.importexportinstancewizard": "By choosing to manage an Instance, CloudStack takes over the orchestration of that Instance. Unmanaging an Instance removes CloudStack ability to manage it. In both cases, the Instance is left running and no changes are done to the VM on the hypervisor.

For KVM, managing a VM is an experimental feature.", "message.desc.importmigratefromvmwarewizard": "By selecting an existing or external VMware Datacenter and an instance to import, CloudStack migrates the selected instance from VMware to KVM on a conversion host using virt-v2v and imports it into a KVM cluster", "message.desc.primary.storage": "Each cluster must contain one or more primary storage servers. We will add the first one now. Primary storage contains the disk volumes for all the Instances running on hosts in the cluster. Use any standards-compliant protocol that is supported by the underlying hypervisor.", diff --git a/ui/src/views/tools/ManageInstances.vue b/ui/src/views/tools/ManageInstances.vue index c7913cabbe4..a869768bf39 100644 --- a/ui/src/views/tools/ManageInstances.vue +++ b/ui/src/views/tools/ManageInstances.vue @@ -567,7 +567,7 @@ export default { }, { name: 'external', - label: 'Import libvirt domain from KVM Host', + label: 'Import Instance from remote KVM host', sourceDestHypervisors: { kvm: 'kvm' }, @@ -576,7 +576,7 @@ export default { }, { name: 'local', - label: 'Import QCOW image from Local Storage', + label: 'Import QCOW2 image from Local Storage', sourceDestHypervisors: { kvm: 'kvm' }, @@ -585,7 +585,7 @@ export default { }, { name: 'shared', - label: 'Import QCOW image from Shared Storage', + label: 'Import QCOW2 image from Shared Storage', sourceDestHypervisors: { kvm: 'kvm' }, From 8d42ca8ccfd0b1a35f61e1229437e0e52b67eaea Mon Sep 17 00:00:00 2001 From: Nicolas Vazquez Date: Thu, 18 Jan 2024 11:46:06 -0300 Subject: [PATCH 04/10] Use project version on pom dependencies (#8529) This PR fixes the POM dependencies from a hardcoded value to the project.version property on dependencies --- framework/direct-download/pom.xml | 2 +- .../non-strict-host-anti-affinity/pom.xml | 2 +- plugins/pom.xml | 4 ++-- vmware-base/pom.xml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/framework/direct-download/pom.xml b/framework/direct-download/pom.xml index 4a2dfb44c64..8204370e2c8 100644 --- a/framework/direct-download/pom.xml +++ b/framework/direct-download/pom.xml @@ -25,7 +25,7 @@ org.apache.cloudstack cloud-utils - 4.19.0.0-SNAPSHOT + ${project.version} compile diff --git a/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml b/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml index adb4de23673..4ecc9645f9b 100644 --- a/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml +++ b/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml @@ -25,7 +25,7 @@ org.apache.cloudstack cloud-plugin-non-strict-host-affinity - 4.19.0.0-SNAPSHOT + ${project.version} compile diff --git a/plugins/pom.xml b/plugins/pom.xml index 2edbbd5ee1d..8cea8bbc5c5 100755 --- a/plugins/pom.xml +++ b/plugins/pom.xml @@ -195,7 +195,7 @@ org.apache.cloudstack cloud-engine-storage - 4.19.0.0-SNAPSHOT + ${project.version} compile @@ -213,7 +213,7 @@ org.apache.cloudstack cloud-engine-storage-object - 4.19.0.0-SNAPSHOT + ${project.version} compile diff --git a/vmware-base/pom.xml b/vmware-base/pom.xml index 623d470d760..3ed658d55f9 100644 --- a/vmware-base/pom.xml +++ b/vmware-base/pom.xml @@ -81,7 +81,7 @@ org.apache.cloudstack cloud-core - 4.19.0.0-SNAPSHOT + ${project.version} compile From c325b8a64dd29863201095336e5899fc9efc5279 Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Thu, 18 Jan 2024 17:50:28 -0500 Subject: [PATCH 05/10] fix test --- .../src/test/java/com/cloud/vpc/NetworkACLServiceTest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/src/test/java/com/cloud/vpc/NetworkACLServiceTest.java b/server/src/test/java/com/cloud/vpc/NetworkACLServiceTest.java index 0709243d927..343fd6ba9b2 100644 --- a/server/src/test/java/com/cloud/vpc/NetworkACLServiceTest.java +++ b/server/src/test/java/com/cloud/vpc/NetworkACLServiceTest.java @@ -18,6 +18,7 @@ package com.cloud.vpc; import com.cloud.exception.InvalidParameterValueException; import com.cloud.network.NetworkModel; import com.cloud.network.dao.NetworkDao; +import com.cloud.network.dao.NsxProviderDao; import com.cloud.network.vpc.NetworkACLItemDao; import com.cloud.network.vpc.NetworkACLItemVO; import com.cloud.network.vpc.NetworkACLManager; @@ -78,6 +79,8 @@ public class NetworkACLServiceTest extends TestCase { private NetworkACLItemDao _networkACLItemDao; @Inject private EntityManager _entityMgr; + @Inject + private NsxProviderDao nsxProviderDao; private NetworkACLVO acl; private NetworkACLItemVO aclItem; @@ -184,6 +187,9 @@ public class NetworkACLServiceTest extends TestCase { return Mockito.mock(VpcService.class); } + @Bean + public NsxProviderDao nsxProviderDao() { return Mockito.mock(NsxProviderDao.class); } + public static class Library implements TypeFilter { @Override public boolean match(MetadataReader mdr, MetadataReaderFactory arg1) throws IOException { From 13a2aab7754938ce67df8b7765f7c779e0a73973 Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Thu, 18 Jan 2024 18:37:06 -0500 Subject: [PATCH 06/10] fix test - build failure --- .../vpc/NetworkACLServiceImplTest.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java b/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java index 18a072172ad..76698b94854 100644 --- a/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java +++ b/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java @@ -17,10 +17,7 @@ package com.cloud.network.vpc; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.ArgumentMatchers.isNull; -import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.times; import java.util.ArrayList; @@ -29,7 +26,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import com.cloud.dc.DataCenter; import com.cloud.exception.PermissionDeniedException; +import com.cloud.network.dao.NsxProviderDao; +import com.cloud.network.element.NsxProviderVO; import com.cloud.network.vpc.dao.VpcDao; import org.apache.cloudstack.acl.SecurityChecker.AccessType; import org.apache.cloudstack.api.ServerApiException; @@ -103,6 +103,8 @@ public class NetworkACLServiceImplTest { private NetworkACLVO networkACLVOMock; @Mock private UpdateNetworkACLListCmd updateNetworkACLListCmdMock; + @Mock + private NsxProviderDao nsxProviderDao; private Long networkAclMockId = 5L; private Long networkOfferingMockId = 2L; @@ -129,6 +131,10 @@ public class NetworkACLServiceImplTest { @Mock private VpcVO vpcVOMock; + @Mock + DataCenter dataCenterVO; + @Mock + NsxProviderVO nsxProviderVO; @Mock private Account accountMock; @@ -173,7 +179,9 @@ public class NetworkACLServiceImplTest { private void createNetworkACLItemTestForNumberAndExecuteTest(Integer number) { Mockito.when(createNetworkAclCmdMock.getNumber()).thenReturn(number); - + Mockito.when(vpcDaoMock.findById(anyLong())).thenReturn(vpcVOMock); + Mockito.when(entityManagerMock.findById(any(), anyLong())).thenReturn(dataCenterVO); + Mockito.when(nsxProviderDao.findByZoneId(anyLong())).thenReturn(null); Mockito.doReturn(networkAclMockId).when(networkAclServiceImpl).createAclListIfNeeded(createNetworkAclCmdMock); Mockito.when(networkAclManagerMock.getNetworkACL(networkAclMockId)).thenReturn(networkAclMock); @@ -710,6 +718,9 @@ public class NetworkACLServiceImplTest { @Test public void updateNetworkACLItemTest() throws ResourceUnavailableException { Mockito.when(networkAclItemVoMock.getAclId()).thenReturn(networkAclMockId); + Mockito.when(vpcDaoMock.findById(anyLong())).thenReturn(vpcVOMock); + Mockito.when(entityManagerMock.findById(any(), anyLong())).thenReturn(dataCenterVO); + Mockito.when(nsxProviderDao.findByZoneId(anyLong())).thenReturn(null); Mockito.doReturn(networkAclItemVoMock).when(networkAclServiceImpl).validateNetworkAclRuleIdAndRetrieveIt(updateNetworkACLItemCmdMock); Mockito.doReturn(networkAclMock).when(networkAclManagerMock).getNetworkACL(networkAclMockId); Mockito.doNothing().when(networkAclServiceImpl).validateNetworkAcl(Mockito.eq(networkAclMock)); From 2aee0fbd5c568ac7186afe5b3a858e46a40ded94 Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Thu, 18 Jan 2024 18:57:23 -0500 Subject: [PATCH 07/10] fix test - build failure --- .../com/cloud/network/vpc/NetworkACLServiceImplTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java b/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java index 76698b94854..afd2b61960e 100644 --- a/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java +++ b/server/src/test/java/com/cloud/network/vpc/NetworkACLServiceImplTest.java @@ -17,7 +17,12 @@ package com.cloud.network.vpc; -import static org.mockito.ArgumentMatchers.*; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.ArgumentMatchers.eq; + import static org.mockito.Mockito.times; import java.util.ArrayList; From 80bbb29abfb90476cec155c2f831e626710bf420 Mon Sep 17 00:00:00 2001 From: kishankavala Date: Fri, 19 Jan 2024 13:26:25 +0530 Subject: [PATCH 08/10] CleanUp Async Jobs after mgmt server maintenance (#8394) This PR fixes moves resources stuck in transition state during async job cleanup Problem: During maintenance of the management server, other servers in the cluster or the same server after a restart initiate async job cleanup. However, this process leaves resources in a transitional state. The only recovery option currently available is to make direct database changes. Solution: This PR introduces a resolution by changing Volume, Virtual Machine, and Network resources from their transitional states. This adjustment enables the reattempt of failed operations without the need for manual database modifications. --- .../main/java/com/cloud/storage/Volume.java | 54 ++++--- .../service/NetworkOrchestrationService.java | 3 + .../subsystem/api/storage/VolumeService.java | 5 +- .../orchestration/NetworkOrchestrator.java | 3 +- .../storage/volume/VolumeObject.java | 2 +- .../storage/volume/VolumeServiceImpl.java | 12 +- .../jobs/impl/AsyncJobManagerImpl.java | 143 +++++++++++++++--- .../jobs/impl/AsyncJobManagerImplTest.java | 96 ++++++++++++ .../com/cloud/vpc/MockNetworkManagerImpl.java | 6 + 9 files changed, 274 insertions(+), 50 deletions(-) create mode 100644 framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java diff --git a/api/src/main/java/com/cloud/storage/Volume.java b/api/src/main/java/com/cloud/storage/Volume.java index 4a14197bd30..308ed2544ed 100644 --- a/api/src/main/java/com/cloud/storage/Volume.java +++ b/api/src/main/java/com/cloud/storage/Volume.java @@ -39,30 +39,38 @@ public interface Volume extends ControlledEntity, Identity, InternalIdentity, Ba }; enum State { - Allocated("The volume is allocated but has not been created yet."), - Creating("The volume is being created. getPoolId() should reflect the pool where it is being created."), - Ready("The volume is ready to be used."), - Migrating("The volume is migrating to other storage pool"), - Snapshotting("There is a snapshot created on this volume, not backed up to secondary storage yet"), - RevertSnapshotting("There is a snapshot created on this volume, the volume is being reverting from snapshot"), - Resizing("The volume is being resized"), - Expunging("The volume is being expunging"), - Expunged("The volume has been expunged, and can no longer be recovered"), - Destroy("The volume is destroyed, and can be recovered."), - Destroying("The volume is destroying, and can't be recovered."), - UploadOp("The volume upload operation is in progress or in short the volume is on secondary storage"), - Copying("Volume is copying from image store to primary, in case it's an uploaded volume"), - Uploaded("Volume is uploaded"), - NotUploaded("The volume entry is just created in DB, not yet uploaded"), - UploadInProgress("Volume upload is in progress"), - UploadError("Volume upload encountered some error"), - UploadAbandoned("Volume upload is abandoned since the upload was never initiated within a specified time"), - Attaching("The volume is attaching to a VM from Ready state."), - Restoring("The volume is being restored from backup."); + Allocated(false, "The volume is allocated but has not been created yet."), + Creating(true, "The volume is being created. getPoolId() should reflect the pool where it is being created."), + Ready(false, "The volume is ready to be used."), + Migrating(true, "The volume is migrating to other storage pool"), + Snapshotting(true, "There is a snapshot created on this volume, not backed up to secondary storage yet"), + RevertSnapshotting(true, "There is a snapshot created on this volume, the volume is being reverting from snapshot"), + Resizing(true, "The volume is being resized"), + Expunging(true, "The volume is being expunging"), + Expunged(false, "The volume has been expunged, and can no longer be recovered"), + Destroy(false, "The volume is destroyed, and can be recovered."), + Destroying(false, "The volume is destroying, and can't be recovered."), + UploadOp(true, "The volume upload operation is in progress or in short the volume is on secondary storage"), + Copying(true, "Volume is copying from image store to primary, in case it's an uploaded volume"), + Uploaded(false, "Volume is uploaded"), + NotUploaded(true, "The volume entry is just created in DB, not yet uploaded"), + UploadInProgress(true, "Volume upload is in progress"), + UploadError(false, "Volume upload encountered some error"), + UploadAbandoned(false, "Volume upload is abandoned since the upload was never initiated within a specified time"), + Attaching(true, "The volume is attaching to a VM from Ready state."), + Restoring(true, "The volume is being restored from backup."); + + boolean _transitional; String _description; - private State(String description) { + /** + * Volume State + * @param transitional true for transition/non-final state, otherwise false + * @param description description of the state + */ + private State(boolean transitional, String description) { + _transitional = transitional; _description = description; } @@ -70,6 +78,10 @@ public interface Volume extends ControlledEntity, Identity, InternalIdentity, Ba return s_fsm; } + public boolean isTransitional() { + return _transitional; + } + public String getDescription() { return _description; } diff --git a/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java b/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java index c691b8b0942..2005b70b439 100644 --- a/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java +++ b/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java @@ -50,6 +50,7 @@ import com.cloud.network.rules.LoadBalancerContainer.Scheme; import com.cloud.offering.NetworkOffering; import com.cloud.user.Account; import com.cloud.user.User; +import com.cloud.utils.fsm.NoTransitionException; import com.cloud.utils.Pair; import com.cloud.vm.Nic; import com.cloud.vm.NicProfile; @@ -268,6 +269,8 @@ public interface NetworkOrchestrationService { Map finalizeServicesAndProvidersForNetwork(NetworkOffering offering, Long physicalNetworkId); + boolean stateTransitTo(Network network, Network.Event e) throws NoTransitionException; + List getProvidersForServiceInNetwork(Network network, Service service); StaticNatServiceProvider getStaticNatProviderForNetwork(Network network); diff --git a/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java b/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java index 2c12b70e9eb..50aee83f497 100644 --- a/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java +++ b/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java @@ -18,13 +18,13 @@ */ package org.apache.cloudstack.engine.subsystem.api.storage; -import com.cloud.agent.api.Answer; import java.util.Map; import org.apache.cloudstack.engine.cloud.entity.api.VolumeEntity; import org.apache.cloudstack.framework.async.AsyncCallFuture; import org.apache.cloudstack.storage.command.CommandResult; +import com.cloud.agent.api.Answer; import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.exception.StorageAccessException; import com.cloud.host.Host; @@ -35,6 +35,9 @@ import com.cloud.user.Account; import com.cloud.utils.Pair; public interface VolumeService { + + String SNAPSHOT_ID = "SNAPSHOT_ID"; + class VolumeApiResult extends CommandResult { private final VolumeInfo volume; diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java index 0f6dfe60866..6c10a02abba 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java @@ -4424,7 +4424,8 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra return accessDetails; } - protected boolean stateTransitTo(final NetworkVO network, final Network.Event e) throws NoTransitionException { + @Override + public boolean stateTransitTo(final Network network, final Network.Event e) throws NoTransitionException { return _stateMachine.transitTo(network, e, null, _networksDao); } diff --git a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java index 5ebee87acd4..9e41e0d4d0e 100644 --- a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java +++ b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java @@ -851,7 +851,7 @@ public class VolumeObject implements VolumeInfo { @Override public boolean delete() { - return dataStore == null ? true : dataStore.delete(this); + return dataStore == null || dataStore.delete(this); } @Override diff --git a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java index c0ef227251c..8a3cd39ecbf 100644 --- a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java +++ b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java @@ -32,10 +32,6 @@ import java.util.concurrent.ExecutionException; import javax.inject.Inject; -import org.apache.cloudstack.secret.dao.PassphraseDao; -import com.cloud.storage.VMTemplateVO; -import com.cloud.storage.dao.VMTemplateDao; -import com.cloud.storage.resource.StorageProcessor; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; import org.apache.cloudstack.engine.cloud.entity.api.VolumeEntity; @@ -66,6 +62,7 @@ import org.apache.cloudstack.framework.async.AsyncCallbackDispatcher; import org.apache.cloudstack.framework.async.AsyncCompletionCallback; import org.apache.cloudstack.framework.async.AsyncRpcContext; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.cloudstack.secret.dao.PassphraseDao; import org.apache.cloudstack.storage.RemoteHostEndPoint; import org.apache.cloudstack.storage.command.CommandResult; import org.apache.cloudstack.storage.command.CopyCmdAnswer; @@ -83,6 +80,7 @@ import org.apache.cloudstack.storage.datastore.db.VolumeDataStoreVO; import org.apache.cloudstack.storage.image.store.TemplateObject; import org.apache.cloudstack.storage.to.TemplateObjectTO; import org.apache.cloudstack.storage.to.VolumeObjectTO; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; @@ -122,13 +120,16 @@ import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateStoragePoolVO; import com.cloud.storage.VMTemplateStorageResourceAssoc; import com.cloud.storage.VMTemplateStorageResourceAssoc.Status; +import com.cloud.storage.VMTemplateVO; import com.cloud.storage.Volume; import com.cloud.storage.Volume.State; import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; +import com.cloud.storage.dao.VMTemplateDao; import com.cloud.storage.dao.VMTemplatePoolDao; import com.cloud.storage.dao.VolumeDao; import com.cloud.storage.dao.VolumeDetailsDao; +import com.cloud.storage.resource.StorageProcessor; import com.cloud.storage.snapshot.SnapshotApiService; import com.cloud.storage.snapshot.SnapshotManager; import com.cloud.storage.template.TemplateConstants; @@ -142,7 +143,6 @@ import com.cloud.utils.db.DB; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.vm.VirtualMachine; -import org.apache.commons.lang3.StringUtils; @Component public class VolumeServiceImpl implements VolumeService { @@ -206,8 +206,6 @@ public class VolumeServiceImpl implements VolumeService { @Inject private PassphraseDao passphraseDao; - private final static String SNAPSHOT_ID = "SNAPSHOT_ID"; - public VolumeServiceImpl() { } diff --git a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 9100ee5e34b..3c0f81d0bc1 100644 --- a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -38,9 +38,13 @@ import javax.naming.ConfigurationException; import org.apache.cloudstack.api.ApiCommandResourceType; import org.apache.cloudstack.api.ApiErrorCode; import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotDataFactory; import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo; import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotService; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeService; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.framework.jobs.AsyncJob; @@ -65,7 +69,12 @@ import org.apache.log4j.MDC; import org.apache.log4j.NDC; import com.cloud.cluster.ClusterManagerListener; +import com.cloud.network.Network; +import com.cloud.network.dao.NetworkDao; +import com.cloud.network.dao.NetworkVO; import com.cloud.storage.Snapshot; +import com.cloud.storage.Volume; +import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.SnapshotDetailsDao; import com.cloud.storage.dao.SnapshotDetailsVO; @@ -93,7 +102,11 @@ import com.cloud.utils.db.TransactionCallbackNoReturn; import com.cloud.utils.db.TransactionStatus; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.utils.exception.ExceptionUtil; +import com.cloud.utils.fsm.NoTransitionException; import com.cloud.utils.mgmt.JmxUtil; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.dao.VMInstanceDao; public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, ClusterManagerListener, Configurable { @@ -148,6 +161,15 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, @Inject private SnapshotDetailsDao _snapshotDetailsDao; + @Inject + private VolumeDataFactory volFactory; + @Inject + private VirtualMachineManager virtualMachineManager; + @Inject + private NetworkDao networkDao; + @Inject + private NetworkOrchestrationService networkOrchestrationService; + private volatile long _executionRunNumber = 1; private final ScheduledExecutorService _heartbeatScheduler = Executors.newScheduledThreadPool(1, new NamedThreadFactory("AsyncJobMgr-Heartbeat")); @@ -1089,6 +1111,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, if (s_logger.isDebugEnabled()) { s_logger.debug("Cancel left-over job-" + job.getId()); } + cleanupResources(job); job.setStatus(JobInfo.Status.FAILED); job.setResultCode(ApiErrorCode.INTERNAL_ERROR.getHttpCode()); job.setResult("job cancelled because of management server restart or shutdown"); @@ -1101,26 +1124,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, s_logger.debug("Purge queue item for cancelled job-" + job.getId()); } _queueMgr.purgeAsyncJobQueueItemId(job.getId()); - if (ApiCommandResourceType.Volume.toString().equals(job.getInstanceType())) { - - try { - _volumeDetailsDao.removeDetail(job.getInstanceId(), "SNAPSHOT_ID"); - _volsDao.remove(job.getInstanceId()); - } catch (Exception e) { - s_logger.error("Unexpected exception while removing concurrent request meta data :" + e.getLocalizedMessage()); - } - } - } - final List snapshotList = _snapshotDetailsDao.findDetails(AsyncJob.Constants.MS_ID, Long.toString(msid), false); - for (final SnapshotDetailsVO snapshotDetailsVO : snapshotList) { - SnapshotInfo snapshot = snapshotFactory.getSnapshotOnPrimaryStore(snapshotDetailsVO.getResourceId()); - if (snapshot == null) { - _snapshotDetailsDao.remove(snapshotDetailsVO.getId()); - continue; - } - snapshotSrv.processEventOnSnapshotObject(snapshot, Snapshot.Event.OperationFailed); - _snapshotDetailsDao.removeDetail(snapshotDetailsVO.getResourceId(), AsyncJob.Constants.MS_ID); } + cleanupFailedSnapshotsCreatedWithDefaultStrategy(msid); } }); } catch (Throwable e) { @@ -1128,6 +1133,106 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, } } + /* + Cleanup Resources in transition state and move them to appropriate state + This will allow other operation on the resource, instead of being stuck in transition state + */ + protected boolean cleanupResources(AsyncJobVO job) { + try { + ApiCommandResourceType resourceType = ApiCommandResourceType.fromString(job.getInstanceType()); + if (resourceType == null) { + s_logger.warn("Unknown ResourceType. Skip Cleanup: " + job.getInstanceType()); + return true; + } + switch (resourceType) { + case Volume: + return cleanupVolume(job.getInstanceId()); + case VirtualMachine: + return cleanupVirtualMachine(job.getInstanceId()); + case Network: + return cleanupNetwork(job.getInstanceId()); + } + } catch (Exception e) { + s_logger.warn("Error while cleaning up resource: [" + job.getInstanceType().toString() + "] with Id: " + job.getInstanceId(), e); + return false; + } + return true; + } + + private boolean cleanupVolume(final long volumeId) { + VolumeInfo vol = volFactory.getVolume(volumeId); + if (vol == null) { + s_logger.warn("Volume not found. Skip Cleanup. VolumeId: " + volumeId); + return true; + } + if (vol.getState().isTransitional()) { + s_logger.debug("Cleaning up volume with Id: " + volumeId); + boolean status = vol.stateTransit(Volume.Event.OperationFailed); + cleanupFailedVolumesCreatedFromSnapshots(volumeId); + return status; + } + s_logger.debug("Volume not in transition state. Skip cleanup. VolumeId: " + volumeId); + return true; + } + + private boolean cleanupVirtualMachine(final long vmId) throws Exception { + VMInstanceVO vmInstanceVO = _vmInstanceDao.findById(vmId); + if (vmInstanceVO == null) { + s_logger.warn("Instance not found. Skip Cleanup. InstanceId: " + vmId); + return true; + } + if (vmInstanceVO.getState().isTransitional()) { + s_logger.debug("Cleaning up Instance with Id: " + vmId); + return virtualMachineManager.stateTransitTo(vmInstanceVO, VirtualMachine.Event.OperationFailed, vmInstanceVO.getHostId()); + } + s_logger.debug("Instance not in transition state. Skip cleanup. InstanceId: " + vmId); + return true; + } + + private boolean cleanupNetwork(final long networkId) throws Exception { + NetworkVO networkVO = networkDao.findById(networkId); + if (networkVO == null) { + s_logger.warn("Network not found. Skip Cleanup. NetworkId: " + networkId); + return true; + } + if (Network.State.Implementing.equals(networkVO.getState())) { + try { + s_logger.debug("Cleaning up Network with Id: " + networkId); + return networkOrchestrationService.stateTransitTo(networkVO, Network.Event.OperationFailed); + } catch (final NoTransitionException e) { + networkVO.setState(Network.State.Shutdown); + networkDao.update(networkVO.getId(), networkVO); + } + } + s_logger.debug("Network not in transition state. Skip cleanup. NetworkId: " + networkId); + return true; + } + + private void cleanupFailedVolumesCreatedFromSnapshots(final long volumeId) { + try { + VolumeDetailVO volumeDetail = _volumeDetailsDao.findDetail(volumeId, VolumeService.SNAPSHOT_ID); + if (volumeDetail != null) { + _volumeDetailsDao.removeDetail(volumeId, VolumeService.SNAPSHOT_ID); + _volsDao.remove(volumeId); + } + } catch (Exception e) { + s_logger.error("Unexpected exception while removing concurrent request meta data :" + e.getLocalizedMessage()); + } + } + + private void cleanupFailedSnapshotsCreatedWithDefaultStrategy(final long msid) { + final List snapshotList = _snapshotDetailsDao.findDetails(AsyncJob.Constants.MS_ID, Long.toString(msid), false); + for (final SnapshotDetailsVO snapshotDetailsVO : snapshotList) { + SnapshotInfo snapshot = snapshotFactory.getSnapshotOnPrimaryStore(snapshotDetailsVO.getResourceId()); + if (snapshot == null) { + _snapshotDetailsDao.remove(snapshotDetailsVO.getId()); + continue; + } + snapshotSrv.processEventOnSnapshotObject(snapshot, Snapshot.Event.OperationFailed); + _snapshotDetailsDao.removeDetail(snapshotDetailsVO.getResourceId(), AsyncJob.Constants.MS_ID); + } + } + @Override public void onManagementNodeJoined(List nodeList, long selfNodeId) { } diff --git a/framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java b/framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java new file mode 100644 index 00000000000..0be5dbc01cb --- /dev/null +++ b/framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.framework.jobs.impl; + +import com.cloud.network.Network; +import com.cloud.network.dao.NetworkDao; +import com.cloud.network.dao.NetworkVO; +import com.cloud.storage.Volume; +import com.cloud.utils.fsm.NoTransitionException; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.VirtualMachineManager; +import com.cloud.vm.dao.VMInstanceDao; +import org.apache.cloudstack.api.ApiCommandResourceType; +import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.Spy; +import org.mockito.junit.MockitoJUnitRunner; + +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class AsyncJobManagerImplTest { + @Spy + @InjectMocks + AsyncJobManagerImpl asyncJobManager; + @Mock + VolumeDataFactory volFactory; + @Mock + VMInstanceDao vmInstanceDao; + @Mock + VirtualMachineManager virtualMachineManager; + @Mock + NetworkDao networkDao; + @Mock + NetworkOrchestrationService networkOrchestrationService; + + @Test + public void testCleanupVolumeResource() { + AsyncJobVO job = new AsyncJobVO(); + job.setInstanceType(ApiCommandResourceType.Volume.toString()); + job.setInstanceId(1L); + VolumeInfo volumeInfo = Mockito.mock(VolumeInfo.class); + when(volFactory.getVolume(Mockito.anyLong())).thenReturn(volumeInfo); + when(volumeInfo.getState()).thenReturn(Volume.State.Attaching); + asyncJobManager.cleanupResources(job); + Mockito.verify(volumeInfo, Mockito.times(1)).stateTransit(Volume.Event.OperationFailed); + } + + @Test + public void testCleanupVmResource() throws NoTransitionException { + AsyncJobVO job = new AsyncJobVO(); + job.setInstanceType(ApiCommandResourceType.VirtualMachine.toString()); + job.setInstanceId(1L); + VMInstanceVO vmInstanceVO = Mockito.mock(VMInstanceVO.class); + when(vmInstanceDao.findById(Mockito.anyLong())).thenReturn(vmInstanceVO); + when(vmInstanceVO.getState()).thenReturn(VirtualMachine.State.Starting); + when(vmInstanceVO.getHostId()).thenReturn(1L); + asyncJobManager.cleanupResources(job); + Mockito.verify(virtualMachineManager, Mockito.times(1)).stateTransitTo(vmInstanceVO, VirtualMachine.Event.OperationFailed, 1L); + } + + @Test + public void testCleanupNetworkResource() throws NoTransitionException { + AsyncJobVO job = new AsyncJobVO(); + job.setInstanceType(ApiCommandResourceType.Network.toString()); + job.setInstanceId(1L); + NetworkVO networkVO = Mockito.mock(NetworkVO.class); + when(networkDao.findById(Mockito.anyLong())).thenReturn(networkVO); + when(networkVO.getState()).thenReturn(Network.State.Implementing); + asyncJobManager.cleanupResources(job); + Mockito.verify(networkOrchestrationService, Mockito.times(1)).stateTransitTo(networkVO, + Network.Event.OperationFailed); + } +} diff --git a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java index 04556c49ab2..288211c4330 100644 --- a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java +++ b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java @@ -26,6 +26,7 @@ import javax.naming.ConfigurationException; import com.cloud.dc.DataCenter; import com.cloud.network.PublicIpQuarantine; +import com.cloud.utils.fsm.NoTransitionException; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.api.command.admin.address.ReleasePodIpCmdByAdmin; import org.apache.cloudstack.api.command.admin.network.DedicateGuestVlanRangeCmd; @@ -824,6 +825,11 @@ public class MockNetworkManagerImpl extends ManagerBase implements NetworkOrches return null; } + @Override + public boolean stateTransitTo(Network network, Network.Event e) throws NoTransitionException { + return true; + } + @Override public boolean isNetworkInlineMode(Network network) { // TODO Auto-generated method stub From 080f171c6d70ca04a93d2fc49838e488d490cf8c Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Fri, 19 Jan 2024 10:48:08 -0500 Subject: [PATCH 09/10] NSX: Cleanup NSX resources during k8s cluster cleanup (#8528) --- .../cluster/KubernetesClusterHelper.java | 1 + .../cluster/KubernetesClusterHelperImpl.java | 13 +++++++++ .../dao/KubernetesClusterVmMapDao.java | 2 ++ .../dao/KubernetesClusterVmMapDaoImpl.java | 12 ++++++++ .../apache/cloudstack/service/NsxElement.java | 21 ++++++++------ .../java/com/cloud/vm/UserVmManagerImpl.java | 28 +++++++++++++++---- .../spring-server-core-managers-context.xml | 5 ++-- 7 files changed, 66 insertions(+), 16 deletions(-) diff --git a/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java b/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java index e445e50f82c..e160227749d 100644 --- a/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java +++ b/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java @@ -22,4 +22,5 @@ import org.apache.cloudstack.acl.ControlledEntity; public interface KubernetesClusterHelper extends Adapter { ControlledEntity findByUuid(String uuid); + ControlledEntity findByVmId(long vmId); } diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java index 0ef916ab959..60bd81c7c5a 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java @@ -17,6 +17,7 @@ package com.cloud.kubernetes.cluster; import com.cloud.kubernetes.cluster.dao.KubernetesClusterDao; +import com.cloud.kubernetes.cluster.dao.KubernetesClusterVmMapDao; import com.cloud.utils.component.AdapterBase; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.framework.config.ConfigKey; @@ -24,18 +25,30 @@ import org.apache.cloudstack.framework.config.Configurable; import org.springframework.stereotype.Component; import javax.inject.Inject; +import java.util.Objects; @Component public class KubernetesClusterHelperImpl extends AdapterBase implements KubernetesClusterHelper, Configurable { @Inject private KubernetesClusterDao kubernetesClusterDao; + @Inject + private KubernetesClusterVmMapDao kubernetesClusterVmMapDao; @Override public ControlledEntity findByUuid(String uuid) { return kubernetesClusterDao.findByUuid(uuid); } + @Override + public ControlledEntity findByVmId(long vmId) { + KubernetesClusterVmMapVO clusterVmMapVO = kubernetesClusterVmMapDao.getClusterMapFromVmId(vmId); + if (Objects.isNull(clusterVmMapVO)) { + return null; + } + return kubernetesClusterDao.findById(clusterVmMapVO.getClusterId()); + } + @Override public String getConfigComponentName() { return KubernetesClusterHelper.class.getSimpleName(); diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java index 688a611ac99..45c0b79485c 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java @@ -23,6 +23,8 @@ import java.util.List; public interface KubernetesClusterVmMapDao extends GenericDao { public List listByClusterId(long clusterId); + + public KubernetesClusterVmMapVO getClusterMapFromVmId(long vmId); public List listByClusterIdAndVmIdsIn(long clusterId, List vmIds); int removeByClusterIdAndVmIdsIn(long clusterId, List vmIds); diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java index b9f2ec917b2..0d90a4cdaca 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java @@ -31,12 +31,17 @@ import com.cloud.utils.db.SearchCriteria; public class KubernetesClusterVmMapDaoImpl extends GenericDaoBase implements KubernetesClusterVmMapDao { private final SearchBuilder clusterIdSearch; + private final SearchBuilder vmIdSearch; public KubernetesClusterVmMapDaoImpl() { clusterIdSearch = createSearchBuilder(); clusterIdSearch.and("clusterId", clusterIdSearch.entity().getClusterId(), SearchCriteria.Op.EQ); clusterIdSearch.and("vmIdsIN", clusterIdSearch.entity().getVmId(), SearchCriteria.Op.IN); clusterIdSearch.done(); + + vmIdSearch = createSearchBuilder(); + vmIdSearch.and("vmId", vmIdSearch.entity().getVmId(), SearchCriteria.Op.EQ); + vmIdSearch.done(); } @Override @@ -47,6 +52,13 @@ public class KubernetesClusterVmMapDaoImpl extends GenericDaoBase sc = vmIdSearch.create(); + sc.setParameters("vmId", vmId); + return findOneBy(sc); + } + @Override public List listByClusterIdAndVmIdsIn(long clusterId, List vmIds) { SearchCriteria sc = clusterIdSearch.create(); diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java index 0986c367cf9..571e68496d6 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java @@ -505,10 +505,12 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns if (!canHandle(network, Network.Service.PortForwarding)) { return false; } + boolean result = true; for (PortForwardingRule rule : rules) { IPAddressVO publicIp = ApiDBUtils.findIpAddressById(rule.getSourceIpAddressId()); UserVm vm = ApiDBUtils.findUserVmById(rule.getVirtualMachineId()); - if (vm == null || networkModel.getNicInNetwork(vm.getId(), network.getId()) == null) { + if ((vm == null && (rule.getState() != FirewallRule.State.Revoke)) || + (vm != null && networkModel.getNicInNetwork(vm.getId(), network.getId()) == null)) { continue; } NsxOpObject nsxObject = getNsxOpObject(network); @@ -523,8 +525,8 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns .setNetworkResourceId(nsxObject.getNetworkResourceId()) .setNetworkResourceName(nsxObject.getNetworkResourceName()) .setVpcResource(nsxObject.isVpcResource()) - .setVmId(vm.getId()) - .setVmIp(vm.getPrivateIpAddress()) + .setVmId(Objects.nonNull(vm) ? vm.getId() : 0) + .setVmIp(Objects.nonNull(vm) ? vm.getPrivateIpAddress() : null) .setPublicIp(publicIp.getAddress().addr()) .setPrivatePort(privatePort) .setPublicPort(publicPort) @@ -532,12 +534,12 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns .setProtocol(rule.getProtocol().toUpperCase(Locale.ROOT)) .build(); if (rule.getState() == FirewallRule.State.Add) { - return nsxService.createPortForwardRule(networkRule); + result &= nsxService.createPortForwardRule(networkRule); } else if (rule.getState() == FirewallRule.State.Revoke) { - return nsxService.deletePortForwardRule(networkRule); + result &= nsxService.deletePortForwardRule(networkRule); } } - return true; + return result; } public Pair getVpcOrNetwork(Long vpcId, long networkId) { @@ -613,6 +615,7 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns @Override public boolean applyLBRules(Network network, List rules) throws ResourceUnavailableException { + boolean result = true; for (LoadBalancingRule loadBalancingRule : rules) { if (loadBalancingRule.getState() == FirewallRule.State.Active) { continue; @@ -638,12 +641,12 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns .setAlgorithm(loadBalancingRule.getAlgorithm()) .build(); if (loadBalancingRule.getState() == FirewallRule.State.Add) { - return nsxService.createLbRule(networkRule); + result &= nsxService.createLbRule(networkRule); } else if (loadBalancingRule.getState() == FirewallRule.State.Revoke) { - return nsxService.deleteLbRule(networkRule); + result &= nsxService.deleteLbRule(networkRule); } } - return true; + return result; } @Override diff --git a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java index 2927db638ab..844e0380757 100644 --- a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java @@ -51,6 +51,9 @@ import javax.naming.ConfigurationException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; +import com.cloud.kubernetes.cluster.KubernetesClusterHelper; +import com.cloud.network.dao.NsxProviderDao; +import com.cloud.network.element.NsxProviderVO; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.acl.SecurityChecker.AccessType; @@ -589,6 +592,8 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir @Inject VMScheduleManager vmScheduleManager; + @Inject + NsxProviderDao nsxProviderDao; private ScheduledExecutorService _executor = null; private ScheduledExecutorService _vmIpFetchExecutor = null; @@ -597,6 +602,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir private boolean _dailyOrHourly = false; private int capacityReleaseInterval; private ExecutorService _vmIpFetchThreadExecutor; + private List kubernetesClusterHelpers; private String _instance; @@ -610,6 +616,14 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir private static final int NUM_OF_2K_BLOCKS = 512; private static final int MAX_HTTP_POST_LENGTH = NUM_OF_2K_BLOCKS * MAX_USER_DATA_LENGTH_BYTES; + public List getKubernetesClusterHelpers() { + return kubernetesClusterHelpers; + } + + public void setKubernetesClusterHelpers(final List kubernetesClusterHelpers) { + this.kubernetesClusterHelpers = kubernetesClusterHelpers; + } + @Inject private OrchestrationService _orchSrvc; @@ -2528,11 +2542,15 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir } // cleanup port forwarding rules - if (_rulesMgr.revokePortForwardingRulesForVm(vmId)) { - s_logger.debug("Port forwarding rules are removed successfully as a part of vm id=" + vmId + " expunge"); - } else { - success = false; - s_logger.warn("Fail to remove port forwarding rules as a part of vm id=" + vmId + " expunge"); + VMInstanceVO vmInstanceVO = _vmInstanceDao.findById(vmId); + NsxProviderVO nsx = nsxProviderDao.findByZoneId(vmInstanceVO.getDataCenterId()); + if (Objects.isNull(nsx) || Objects.isNull(kubernetesClusterHelpers.get(0).findByVmId(vmId))) { + if (_rulesMgr.revokePortForwardingRulesForVm(vmId)) { + s_logger.debug("Port forwarding rules are removed successfully as a part of vm id=" + vmId + " expunge"); + } else { + success = false; + s_logger.warn("Fail to remove port forwarding rules as a part of vm id=" + vmId + " expunge"); + } } // cleanup load balancer rules diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 7227264e229..9656d544fbb 100644 --- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -106,8 +106,9 @@ - - + + + Date: Fri, 19 Jan 2024 12:53:23 -0500 Subject: [PATCH 10/10] fix test failure --- .../java/org/apache/cloudstack/service/NsxElementTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java b/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java index ae3143e32b8..39fb2e06120 100644 --- a/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java +++ b/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java @@ -273,8 +273,13 @@ public class NsxElementTest { 5L, 2L, 15L); rule.setState(FirewallRule.State.Revoke); Network.Service service = new Network.Service("service1", new Network.Capability("capability")); - + VpcVO vpcVO = Mockito.mock(VpcVO.class); + when(vpcDao.findById(1L)).thenReturn(vpcVO); + when(vpcVO.getDomainId()).thenReturn(2L); + IPAddressVO ipAddress = new IPAddressVO(new Ip("10.1.13.10"), 1L, 1L, 1L,false); + when(ApiDBUtils.findIpAddressById(anyLong())).thenReturn(ipAddress); when(nsxElement.canHandle(networkVO, service)).thenReturn(true); + when(nsxService.deletePortForwardRule(any(NsxNetworkRule.class))).thenReturn(true); assertTrue(nsxElement.applyPFRules(networkVO, List.of(rule))); }