From c3b77cb7b82bfe8e94b024e81e1721532da51a9c Mon Sep 17 00:00:00 2001 From: Vishesh Date: Mon, 15 Jan 2024 13:56:34 +0530 Subject: [PATCH 01/11] Fix host stuck in connecting state (#8502) There are a lot of test failures due to test_vm_life_cycle.py in multiple PRs due to host not available for migration of VMs. #8438 (comment) #8433 (comment) #7344 (comment) While debugging I noticed that the hosts get stuck in Connecting state because MS is waiting for a response of the ReadyCommand from the agent. Since we take a lock on connection and disconnection, restarting the agent doesn't work. To fix this, we have to restart the MS or wait for ~1 hour (default timeout). On the agent side, it gets stuck waiting for a response from the Script execution. To reproduce, run smoke/test_vm_life_cycle.py (TestSecuredVmMigration test class to be specific). Once the tests are complete, you will notice that some hosts are stuck in Connecting state. And restarting the agent fails due to the named lock. Locks on DB can be checked using the below query. SELECT * FROM performance_schema.metadata_locks INNER JOIN performance_schema.threads ON THREAD_ID = OWNER_THREAD_ID WHERE PROCESSLIST_ID <> CONNECTION_ID() \G; This PR adds a wait for the ready command and a timeout to the Script execution to ensure that the thread doesn't get stuck and the named lock from database is released. --- .../src/main/java/com/cloud/agent/manager/AgentManagerImpl.java | 1 + .../storage/datastore/provider/DefaultHostListener.java | 1 + .../kvm/resource/wrapper/LibvirtReadyCommandWrapper.java | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index 023f9c18e0a..d8671ed29df 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -596,6 +596,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl final Long dcId = host.getDataCenterId(); final ReadyCommand ready = new ReadyCommand(dcId, host.getId(), NumbersUtil.enableHumanReadableSizes); + ready.setWait(60); final Answer answer = easySend(hostId, ready); if (answer == null || !answer.getResult()) { // this is tricky part for secondary storage diff --git a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java index e344a87831d..90e8742c84d 100644 --- a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java +++ b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java @@ -121,6 +121,7 @@ public class DefaultHostListener implements HypervisorHostListener { public boolean hostConnect(long hostId, long poolId) throws StorageConflictException { StoragePool pool = (StoragePool) this.dataStoreMgr.getDataStore(poolId, DataStoreRole.Primary); ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(true, pool); + cmd.setWait(60); final Answer answer = agentMgr.easySend(hostId, cmd); if (answer == null) { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java index fc57cd412f0..4df74decdea 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java @@ -55,7 +55,7 @@ public final class LibvirtReadyCommandWrapper extends CommandWrapper Date: Thu, 18 Jan 2024 13:36:28 +0100 Subject: [PATCH 02/11] scripts: mark multipath scripts as executable (#8524) This PR marks the multipath scripts as executable. This fixes the issue that in 4.19.0.0-RC2, vms can not be stopped in ubuntu hosts. 2024-01-17 12:56:26,061 ERROR [c.c.v.VmWorkJobHandlerProxy] (Work-Job-Executor-4:ctx-e3503563 job-38/job-39 ctx-42706275) (logid:81ede4e9) Invocation exception, caused by: com.cloud.utils.exception.CloudRuntimeException: Unable to stop the virtual machine due to java.lang.NullPointerException at com.cloud.utils.script.Script.getExitValue(Script.java:74) at com.cloud.hypervisor.kvm.storage.MultipathSCSIAdapterBase.runScript(MultipathSCSIAdapterBase.java:476) at com.cloud.hypervisor.kvm.storage.MultipathSCSIAdapterBase.disconnectPhysicalDiskByPath(MultipathSCSIAdapterBase.java:226) at com.cloud.hypervisor.kvm.storage.KVMStoragePoolManager.disconnectPhysicalDiskByPath(KVMStoragePoolManager.java:205) at com.cloud.hypervisor.kvm.resource.LibvirtComputingResource.cleanupDisk(LibvirtComputingResource.java:3335) at com.cloud.hypervisor.kvm.resource.wrapper.LibvirtStopCommandWrapper.execute(LibvirtStopCommandWrapper.java:101) at com.cloud.hypervisor.kvm.resource.wrapper.LibvirtStopCommandWrapper.execute(LibvirtStopCommandWrapper.java:49) at com.cloud.hypervisor.kvm.resource.wrapper.LibvirtRequestWrapper.execute(LibvirtRequestWrapper.java:78) at com.cloud.hypervisor.kvm.resource.LibvirtComputingResource.executeRequest(LibvirtComputingResource.java:1903) --- scripts/storage/multipath/cleanStaleMaps.sh | 0 scripts/storage/multipath/connectVolume.sh | 0 scripts/storage/multipath/copyVolume.sh | 0 scripts/storage/multipath/disconnectVolume.sh | 0 scripts/storage/multipath/resizeVolume.sh | 0 5 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/storage/multipath/cleanStaleMaps.sh mode change 100644 => 100755 scripts/storage/multipath/connectVolume.sh mode change 100644 => 100755 scripts/storage/multipath/copyVolume.sh mode change 100644 => 100755 scripts/storage/multipath/disconnectVolume.sh mode change 100644 => 100755 scripts/storage/multipath/resizeVolume.sh diff --git a/scripts/storage/multipath/cleanStaleMaps.sh b/scripts/storage/multipath/cleanStaleMaps.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/connectVolume.sh b/scripts/storage/multipath/connectVolume.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/copyVolume.sh b/scripts/storage/multipath/copyVolume.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/disconnectVolume.sh b/scripts/storage/multipath/disconnectVolume.sh old mode 100644 new mode 100755 diff --git a/scripts/storage/multipath/resizeVolume.sh b/scripts/storage/multipath/resizeVolume.sh old mode 100644 new mode 100755 From 3bcf6f0faf496e28c39429303b2187175877d426 Mon Sep 17 00:00:00 2001 From: Andrija Panic <45762285+andrijapanicsb@users.noreply.github.com> Date: Thu, 18 Jan 2024 13:37:07 +0100 Subject: [PATCH 03/11] Rename "Import QCOW...." to "Import QCOW2....." (#8519) Minor UI updates, renaming "Import QCOW...." to "Import QCOW2....." --- ui/public/locales/en.json | 10 +++++----- ui/src/views/tools/ManageInstances.vue | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ui/public/locales/en.json b/ui/public/locales/en.json index 443b086f580..b86e39aba7b 100644 --- a/ui/public/locales/en.json +++ b/ui/public/locales/en.json @@ -675,9 +675,9 @@ "label.deploymentplanner": "Deployment planner", "label.desc.db.stats": "Database Statistics", "label.desc.importexportinstancewizard": "Import and export Instances to/from an existing VMware or KVM cluster.", -"label.desc.import.ext.kvm.wizard": "Import libvirt domain from KVM Host", -"label.desc.import.local.kvm.wizard": "Import QCOW image from Local Storage", -"label.desc.import.shared.kvm.wizard": "Import QCOW image from Shared Storage", +"label.desc.import.ext.kvm.wizard": "Import Instance from remote KVM host", +"label.desc.import.local.kvm.wizard": "Import QCOW2 image from Local Storage", +"label.desc.import.shared.kvm.wizard": "Import QCOW2 image from Shared Storage", "label.desc.ingesttinstancewizard": "Ingest instances from an external KVM host", "label.desc.importmigratefromvmwarewizard": "Import instances from VMware into a KVM cluster", "label.desc.usage.stats": "Usage Server Statistics", @@ -2699,8 +2699,8 @@ "message.desc.host": "Each cluster must contain at least one host (computer) for guest Instances to run on. We will add the first host now. For a host to function in CloudStack, you must install hypervisor software on the host, assign an IP address to the host, and ensure the host is connected to the CloudStack management server.

Give the host's DNS or IP address, the user name (usually root) and password, and any labels you use to categorize hosts.", "message.desc.importingestinstancewizard": "This feature only applies to libvirt based KVM instances. Only Stopped instances can be ingested", "message.desc.import.ext.kvm.wizard": "Import libvirt domain from External KVM Host not managed by CloudStack", -"message.desc.import.local.kvm.wizard": "Import QCOW image from Local Storage of selected KVM Host", -"message.desc.import.shared.kvm.wizard": "Import QCOW image from selected Primary Storage Pool", +"message.desc.import.local.kvm.wizard": "Import QCOW2 image from Local Storage of selected KVM Host", +"message.desc.import.shared.kvm.wizard": "Import QCOW2 image from selected Primary Storage Pool", "message.desc.importexportinstancewizard": "By choosing to manage an Instance, CloudStack takes over the orchestration of that Instance. Unmanaging an Instance removes CloudStack ability to manage it. In both cases, the Instance is left running and no changes are done to the VM on the hypervisor.

For KVM, managing a VM is an experimental feature.", "message.desc.importmigratefromvmwarewizard": "By selecting an existing or external VMware Datacenter and an instance to import, CloudStack migrates the selected instance from VMware to KVM on a conversion host using virt-v2v and imports it into a KVM cluster", "message.desc.primary.storage": "Each cluster must contain one or more primary storage servers. We will add the first one now. Primary storage contains the disk volumes for all the Instances running on hosts in the cluster. Use any standards-compliant protocol that is supported by the underlying hypervisor.", diff --git a/ui/src/views/tools/ManageInstances.vue b/ui/src/views/tools/ManageInstances.vue index c7913cabbe4..a869768bf39 100644 --- a/ui/src/views/tools/ManageInstances.vue +++ b/ui/src/views/tools/ManageInstances.vue @@ -567,7 +567,7 @@ export default { }, { name: 'external', - label: 'Import libvirt domain from KVM Host', + label: 'Import Instance from remote KVM host', sourceDestHypervisors: { kvm: 'kvm' }, @@ -576,7 +576,7 @@ export default { }, { name: 'local', - label: 'Import QCOW image from Local Storage', + label: 'Import QCOW2 image from Local Storage', sourceDestHypervisors: { kvm: 'kvm' }, @@ -585,7 +585,7 @@ export default { }, { name: 'shared', - label: 'Import QCOW image from Shared Storage', + label: 'Import QCOW2 image from Shared Storage', sourceDestHypervisors: { kvm: 'kvm' }, From 8d42ca8ccfd0b1a35f61e1229437e0e52b67eaea Mon Sep 17 00:00:00 2001 From: Nicolas Vazquez Date: Thu, 18 Jan 2024 11:46:06 -0300 Subject: [PATCH 04/11] Use project version on pom dependencies (#8529) This PR fixes the POM dependencies from a hardcoded value to the project.version property on dependencies --- framework/direct-download/pom.xml | 2 +- .../non-strict-host-anti-affinity/pom.xml | 2 +- plugins/pom.xml | 4 ++-- vmware-base/pom.xml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/framework/direct-download/pom.xml b/framework/direct-download/pom.xml index 4a2dfb44c64..8204370e2c8 100644 --- a/framework/direct-download/pom.xml +++ b/framework/direct-download/pom.xml @@ -25,7 +25,7 @@ org.apache.cloudstack cloud-utils - 4.19.0.0-SNAPSHOT + ${project.version} compile diff --git a/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml b/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml index adb4de23673..4ecc9645f9b 100644 --- a/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml +++ b/plugins/affinity-group-processors/non-strict-host-anti-affinity/pom.xml @@ -25,7 +25,7 @@ org.apache.cloudstack cloud-plugin-non-strict-host-affinity - 4.19.0.0-SNAPSHOT + ${project.version} compile diff --git a/plugins/pom.xml b/plugins/pom.xml index 2edbbd5ee1d..8cea8bbc5c5 100755 --- a/plugins/pom.xml +++ b/plugins/pom.xml @@ -195,7 +195,7 @@ org.apache.cloudstack cloud-engine-storage - 4.19.0.0-SNAPSHOT + ${project.version} compile @@ -213,7 +213,7 @@ org.apache.cloudstack cloud-engine-storage-object - 4.19.0.0-SNAPSHOT + ${project.version} compile diff --git a/vmware-base/pom.xml b/vmware-base/pom.xml index 623d470d760..3ed658d55f9 100644 --- a/vmware-base/pom.xml +++ b/vmware-base/pom.xml @@ -81,7 +81,7 @@ org.apache.cloudstack cloud-core - 4.19.0.0-SNAPSHOT + ${project.version} compile From 80bbb29abfb90476cec155c2f831e626710bf420 Mon Sep 17 00:00:00 2001 From: kishankavala Date: Fri, 19 Jan 2024 13:26:25 +0530 Subject: [PATCH 05/11] CleanUp Async Jobs after mgmt server maintenance (#8394) This PR fixes moves resources stuck in transition state during async job cleanup Problem: During maintenance of the management server, other servers in the cluster or the same server after a restart initiate async job cleanup. However, this process leaves resources in a transitional state. The only recovery option currently available is to make direct database changes. Solution: This PR introduces a resolution by changing Volume, Virtual Machine, and Network resources from their transitional states. This adjustment enables the reattempt of failed operations without the need for manual database modifications. --- .../main/java/com/cloud/storage/Volume.java | 54 ++++--- .../service/NetworkOrchestrationService.java | 3 + .../subsystem/api/storage/VolumeService.java | 5 +- .../orchestration/NetworkOrchestrator.java | 3 +- .../storage/volume/VolumeObject.java | 2 +- .../storage/volume/VolumeServiceImpl.java | 12 +- .../jobs/impl/AsyncJobManagerImpl.java | 143 +++++++++++++++--- .../jobs/impl/AsyncJobManagerImplTest.java | 96 ++++++++++++ .../com/cloud/vpc/MockNetworkManagerImpl.java | 6 + 9 files changed, 274 insertions(+), 50 deletions(-) create mode 100644 framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java diff --git a/api/src/main/java/com/cloud/storage/Volume.java b/api/src/main/java/com/cloud/storage/Volume.java index 4a14197bd30..308ed2544ed 100644 --- a/api/src/main/java/com/cloud/storage/Volume.java +++ b/api/src/main/java/com/cloud/storage/Volume.java @@ -39,30 +39,38 @@ public interface Volume extends ControlledEntity, Identity, InternalIdentity, Ba }; enum State { - Allocated("The volume is allocated but has not been created yet."), - Creating("The volume is being created. getPoolId() should reflect the pool where it is being created."), - Ready("The volume is ready to be used."), - Migrating("The volume is migrating to other storage pool"), - Snapshotting("There is a snapshot created on this volume, not backed up to secondary storage yet"), - RevertSnapshotting("There is a snapshot created on this volume, the volume is being reverting from snapshot"), - Resizing("The volume is being resized"), - Expunging("The volume is being expunging"), - Expunged("The volume has been expunged, and can no longer be recovered"), - Destroy("The volume is destroyed, and can be recovered."), - Destroying("The volume is destroying, and can't be recovered."), - UploadOp("The volume upload operation is in progress or in short the volume is on secondary storage"), - Copying("Volume is copying from image store to primary, in case it's an uploaded volume"), - Uploaded("Volume is uploaded"), - NotUploaded("The volume entry is just created in DB, not yet uploaded"), - UploadInProgress("Volume upload is in progress"), - UploadError("Volume upload encountered some error"), - UploadAbandoned("Volume upload is abandoned since the upload was never initiated within a specified time"), - Attaching("The volume is attaching to a VM from Ready state."), - Restoring("The volume is being restored from backup."); + Allocated(false, "The volume is allocated but has not been created yet."), + Creating(true, "The volume is being created. getPoolId() should reflect the pool where it is being created."), + Ready(false, "The volume is ready to be used."), + Migrating(true, "The volume is migrating to other storage pool"), + Snapshotting(true, "There is a snapshot created on this volume, not backed up to secondary storage yet"), + RevertSnapshotting(true, "There is a snapshot created on this volume, the volume is being reverting from snapshot"), + Resizing(true, "The volume is being resized"), + Expunging(true, "The volume is being expunging"), + Expunged(false, "The volume has been expunged, and can no longer be recovered"), + Destroy(false, "The volume is destroyed, and can be recovered."), + Destroying(false, "The volume is destroying, and can't be recovered."), + UploadOp(true, "The volume upload operation is in progress or in short the volume is on secondary storage"), + Copying(true, "Volume is copying from image store to primary, in case it's an uploaded volume"), + Uploaded(false, "Volume is uploaded"), + NotUploaded(true, "The volume entry is just created in DB, not yet uploaded"), + UploadInProgress(true, "Volume upload is in progress"), + UploadError(false, "Volume upload encountered some error"), + UploadAbandoned(false, "Volume upload is abandoned since the upload was never initiated within a specified time"), + Attaching(true, "The volume is attaching to a VM from Ready state."), + Restoring(true, "The volume is being restored from backup."); + + boolean _transitional; String _description; - private State(String description) { + /** + * Volume State + * @param transitional true for transition/non-final state, otherwise false + * @param description description of the state + */ + private State(boolean transitional, String description) { + _transitional = transitional; _description = description; } @@ -70,6 +78,10 @@ public interface Volume extends ControlledEntity, Identity, InternalIdentity, Ba return s_fsm; } + public boolean isTransitional() { + return _transitional; + } + public String getDescription() { return _description; } diff --git a/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java b/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java index c691b8b0942..2005b70b439 100644 --- a/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java +++ b/engine/api/src/main/java/org/apache/cloudstack/engine/orchestration/service/NetworkOrchestrationService.java @@ -50,6 +50,7 @@ import com.cloud.network.rules.LoadBalancerContainer.Scheme; import com.cloud.offering.NetworkOffering; import com.cloud.user.Account; import com.cloud.user.User; +import com.cloud.utils.fsm.NoTransitionException; import com.cloud.utils.Pair; import com.cloud.vm.Nic; import com.cloud.vm.NicProfile; @@ -268,6 +269,8 @@ public interface NetworkOrchestrationService { Map finalizeServicesAndProvidersForNetwork(NetworkOffering offering, Long physicalNetworkId); + boolean stateTransitTo(Network network, Network.Event e) throws NoTransitionException; + List getProvidersForServiceInNetwork(Network network, Service service); StaticNatServiceProvider getStaticNatProviderForNetwork(Network network); diff --git a/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java b/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java index 2c12b70e9eb..50aee83f497 100644 --- a/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java +++ b/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/VolumeService.java @@ -18,13 +18,13 @@ */ package org.apache.cloudstack.engine.subsystem.api.storage; -import com.cloud.agent.api.Answer; import java.util.Map; import org.apache.cloudstack.engine.cloud.entity.api.VolumeEntity; import org.apache.cloudstack.framework.async.AsyncCallFuture; import org.apache.cloudstack.storage.command.CommandResult; +import com.cloud.agent.api.Answer; import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.exception.StorageAccessException; import com.cloud.host.Host; @@ -35,6 +35,9 @@ import com.cloud.user.Account; import com.cloud.utils.Pair; public interface VolumeService { + + String SNAPSHOT_ID = "SNAPSHOT_ID"; + class VolumeApiResult extends CommandResult { private final VolumeInfo volume; diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java index 0f6dfe60866..6c10a02abba 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java @@ -4424,7 +4424,8 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra return accessDetails; } - protected boolean stateTransitTo(final NetworkVO network, final Network.Event e) throws NoTransitionException { + @Override + public boolean stateTransitTo(final Network network, final Network.Event e) throws NoTransitionException { return _stateMachine.transitTo(network, e, null, _networksDao); } diff --git a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java index 5ebee87acd4..9e41e0d4d0e 100644 --- a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java +++ b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeObject.java @@ -851,7 +851,7 @@ public class VolumeObject implements VolumeInfo { @Override public boolean delete() { - return dataStore == null ? true : dataStore.delete(this); + return dataStore == null || dataStore.delete(this); } @Override diff --git a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java index c0ef227251c..8a3cd39ecbf 100644 --- a/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java +++ b/engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java @@ -32,10 +32,6 @@ import java.util.concurrent.ExecutionException; import javax.inject.Inject; -import org.apache.cloudstack.secret.dao.PassphraseDao; -import com.cloud.storage.VMTemplateVO; -import com.cloud.storage.dao.VMTemplateDao; -import com.cloud.storage.resource.StorageProcessor; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; import org.apache.cloudstack.engine.cloud.entity.api.VolumeEntity; @@ -66,6 +62,7 @@ import org.apache.cloudstack.framework.async.AsyncCallbackDispatcher; import org.apache.cloudstack.framework.async.AsyncCompletionCallback; import org.apache.cloudstack.framework.async.AsyncRpcContext; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.cloudstack.secret.dao.PassphraseDao; import org.apache.cloudstack.storage.RemoteHostEndPoint; import org.apache.cloudstack.storage.command.CommandResult; import org.apache.cloudstack.storage.command.CopyCmdAnswer; @@ -83,6 +80,7 @@ import org.apache.cloudstack.storage.datastore.db.VolumeDataStoreVO; import org.apache.cloudstack.storage.image.store.TemplateObject; import org.apache.cloudstack.storage.to.TemplateObjectTO; import org.apache.cloudstack.storage.to.VolumeObjectTO; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; @@ -122,13 +120,16 @@ import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateStoragePoolVO; import com.cloud.storage.VMTemplateStorageResourceAssoc; import com.cloud.storage.VMTemplateStorageResourceAssoc.Status; +import com.cloud.storage.VMTemplateVO; import com.cloud.storage.Volume; import com.cloud.storage.Volume.State; import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; +import com.cloud.storage.dao.VMTemplateDao; import com.cloud.storage.dao.VMTemplatePoolDao; import com.cloud.storage.dao.VolumeDao; import com.cloud.storage.dao.VolumeDetailsDao; +import com.cloud.storage.resource.StorageProcessor; import com.cloud.storage.snapshot.SnapshotApiService; import com.cloud.storage.snapshot.SnapshotManager; import com.cloud.storage.template.TemplateConstants; @@ -142,7 +143,6 @@ import com.cloud.utils.db.DB; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.vm.VirtualMachine; -import org.apache.commons.lang3.StringUtils; @Component public class VolumeServiceImpl implements VolumeService { @@ -206,8 +206,6 @@ public class VolumeServiceImpl implements VolumeService { @Inject private PassphraseDao passphraseDao; - private final static String SNAPSHOT_ID = "SNAPSHOT_ID"; - public VolumeServiceImpl() { } diff --git a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 9100ee5e34b..3c0f81d0bc1 100644 --- a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -38,9 +38,13 @@ import javax.naming.ConfigurationException; import org.apache.cloudstack.api.ApiCommandResourceType; import org.apache.cloudstack.api.ApiErrorCode; import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotDataFactory; import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo; import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotService; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeService; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.framework.jobs.AsyncJob; @@ -65,7 +69,12 @@ import org.apache.log4j.MDC; import org.apache.log4j.NDC; import com.cloud.cluster.ClusterManagerListener; +import com.cloud.network.Network; +import com.cloud.network.dao.NetworkDao; +import com.cloud.network.dao.NetworkVO; import com.cloud.storage.Snapshot; +import com.cloud.storage.Volume; +import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.SnapshotDetailsDao; import com.cloud.storage.dao.SnapshotDetailsVO; @@ -93,7 +102,11 @@ import com.cloud.utils.db.TransactionCallbackNoReturn; import com.cloud.utils.db.TransactionStatus; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.utils.exception.ExceptionUtil; +import com.cloud.utils.fsm.NoTransitionException; import com.cloud.utils.mgmt.JmxUtil; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.dao.VMInstanceDao; public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, ClusterManagerListener, Configurable { @@ -148,6 +161,15 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, @Inject private SnapshotDetailsDao _snapshotDetailsDao; + @Inject + private VolumeDataFactory volFactory; + @Inject + private VirtualMachineManager virtualMachineManager; + @Inject + private NetworkDao networkDao; + @Inject + private NetworkOrchestrationService networkOrchestrationService; + private volatile long _executionRunNumber = 1; private final ScheduledExecutorService _heartbeatScheduler = Executors.newScheduledThreadPool(1, new NamedThreadFactory("AsyncJobMgr-Heartbeat")); @@ -1089,6 +1111,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, if (s_logger.isDebugEnabled()) { s_logger.debug("Cancel left-over job-" + job.getId()); } + cleanupResources(job); job.setStatus(JobInfo.Status.FAILED); job.setResultCode(ApiErrorCode.INTERNAL_ERROR.getHttpCode()); job.setResult("job cancelled because of management server restart or shutdown"); @@ -1101,26 +1124,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, s_logger.debug("Purge queue item for cancelled job-" + job.getId()); } _queueMgr.purgeAsyncJobQueueItemId(job.getId()); - if (ApiCommandResourceType.Volume.toString().equals(job.getInstanceType())) { - - try { - _volumeDetailsDao.removeDetail(job.getInstanceId(), "SNAPSHOT_ID"); - _volsDao.remove(job.getInstanceId()); - } catch (Exception e) { - s_logger.error("Unexpected exception while removing concurrent request meta data :" + e.getLocalizedMessage()); - } - } - } - final List snapshotList = _snapshotDetailsDao.findDetails(AsyncJob.Constants.MS_ID, Long.toString(msid), false); - for (final SnapshotDetailsVO snapshotDetailsVO : snapshotList) { - SnapshotInfo snapshot = snapshotFactory.getSnapshotOnPrimaryStore(snapshotDetailsVO.getResourceId()); - if (snapshot == null) { - _snapshotDetailsDao.remove(snapshotDetailsVO.getId()); - continue; - } - snapshotSrv.processEventOnSnapshotObject(snapshot, Snapshot.Event.OperationFailed); - _snapshotDetailsDao.removeDetail(snapshotDetailsVO.getResourceId(), AsyncJob.Constants.MS_ID); } + cleanupFailedSnapshotsCreatedWithDefaultStrategy(msid); } }); } catch (Throwable e) { @@ -1128,6 +1133,106 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, } } + /* + Cleanup Resources in transition state and move them to appropriate state + This will allow other operation on the resource, instead of being stuck in transition state + */ + protected boolean cleanupResources(AsyncJobVO job) { + try { + ApiCommandResourceType resourceType = ApiCommandResourceType.fromString(job.getInstanceType()); + if (resourceType == null) { + s_logger.warn("Unknown ResourceType. Skip Cleanup: " + job.getInstanceType()); + return true; + } + switch (resourceType) { + case Volume: + return cleanupVolume(job.getInstanceId()); + case VirtualMachine: + return cleanupVirtualMachine(job.getInstanceId()); + case Network: + return cleanupNetwork(job.getInstanceId()); + } + } catch (Exception e) { + s_logger.warn("Error while cleaning up resource: [" + job.getInstanceType().toString() + "] with Id: " + job.getInstanceId(), e); + return false; + } + return true; + } + + private boolean cleanupVolume(final long volumeId) { + VolumeInfo vol = volFactory.getVolume(volumeId); + if (vol == null) { + s_logger.warn("Volume not found. Skip Cleanup. VolumeId: " + volumeId); + return true; + } + if (vol.getState().isTransitional()) { + s_logger.debug("Cleaning up volume with Id: " + volumeId); + boolean status = vol.stateTransit(Volume.Event.OperationFailed); + cleanupFailedVolumesCreatedFromSnapshots(volumeId); + return status; + } + s_logger.debug("Volume not in transition state. Skip cleanup. VolumeId: " + volumeId); + return true; + } + + private boolean cleanupVirtualMachine(final long vmId) throws Exception { + VMInstanceVO vmInstanceVO = _vmInstanceDao.findById(vmId); + if (vmInstanceVO == null) { + s_logger.warn("Instance not found. Skip Cleanup. InstanceId: " + vmId); + return true; + } + if (vmInstanceVO.getState().isTransitional()) { + s_logger.debug("Cleaning up Instance with Id: " + vmId); + return virtualMachineManager.stateTransitTo(vmInstanceVO, VirtualMachine.Event.OperationFailed, vmInstanceVO.getHostId()); + } + s_logger.debug("Instance not in transition state. Skip cleanup. InstanceId: " + vmId); + return true; + } + + private boolean cleanupNetwork(final long networkId) throws Exception { + NetworkVO networkVO = networkDao.findById(networkId); + if (networkVO == null) { + s_logger.warn("Network not found. Skip Cleanup. NetworkId: " + networkId); + return true; + } + if (Network.State.Implementing.equals(networkVO.getState())) { + try { + s_logger.debug("Cleaning up Network with Id: " + networkId); + return networkOrchestrationService.stateTransitTo(networkVO, Network.Event.OperationFailed); + } catch (final NoTransitionException e) { + networkVO.setState(Network.State.Shutdown); + networkDao.update(networkVO.getId(), networkVO); + } + } + s_logger.debug("Network not in transition state. Skip cleanup. NetworkId: " + networkId); + return true; + } + + private void cleanupFailedVolumesCreatedFromSnapshots(final long volumeId) { + try { + VolumeDetailVO volumeDetail = _volumeDetailsDao.findDetail(volumeId, VolumeService.SNAPSHOT_ID); + if (volumeDetail != null) { + _volumeDetailsDao.removeDetail(volumeId, VolumeService.SNAPSHOT_ID); + _volsDao.remove(volumeId); + } + } catch (Exception e) { + s_logger.error("Unexpected exception while removing concurrent request meta data :" + e.getLocalizedMessage()); + } + } + + private void cleanupFailedSnapshotsCreatedWithDefaultStrategy(final long msid) { + final List snapshotList = _snapshotDetailsDao.findDetails(AsyncJob.Constants.MS_ID, Long.toString(msid), false); + for (final SnapshotDetailsVO snapshotDetailsVO : snapshotList) { + SnapshotInfo snapshot = snapshotFactory.getSnapshotOnPrimaryStore(snapshotDetailsVO.getResourceId()); + if (snapshot == null) { + _snapshotDetailsDao.remove(snapshotDetailsVO.getId()); + continue; + } + snapshotSrv.processEventOnSnapshotObject(snapshot, Snapshot.Event.OperationFailed); + _snapshotDetailsDao.removeDetail(snapshotDetailsVO.getResourceId(), AsyncJob.Constants.MS_ID); + } + } + @Override public void onManagementNodeJoined(List nodeList, long selfNodeId) { } diff --git a/framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java b/framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java new file mode 100644 index 00000000000..0be5dbc01cb --- /dev/null +++ b/framework/jobs/src/test/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImplTest.java @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.framework.jobs.impl; + +import com.cloud.network.Network; +import com.cloud.network.dao.NetworkDao; +import com.cloud.network.dao.NetworkVO; +import com.cloud.storage.Volume; +import com.cloud.utils.fsm.NoTransitionException; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.VirtualMachineManager; +import com.cloud.vm.dao.VMInstanceDao; +import org.apache.cloudstack.api.ApiCommandResourceType; +import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.Spy; +import org.mockito.junit.MockitoJUnitRunner; + +import static org.mockito.Mockito.when; + +@RunWith(MockitoJUnitRunner.class) +public class AsyncJobManagerImplTest { + @Spy + @InjectMocks + AsyncJobManagerImpl asyncJobManager; + @Mock + VolumeDataFactory volFactory; + @Mock + VMInstanceDao vmInstanceDao; + @Mock + VirtualMachineManager virtualMachineManager; + @Mock + NetworkDao networkDao; + @Mock + NetworkOrchestrationService networkOrchestrationService; + + @Test + public void testCleanupVolumeResource() { + AsyncJobVO job = new AsyncJobVO(); + job.setInstanceType(ApiCommandResourceType.Volume.toString()); + job.setInstanceId(1L); + VolumeInfo volumeInfo = Mockito.mock(VolumeInfo.class); + when(volFactory.getVolume(Mockito.anyLong())).thenReturn(volumeInfo); + when(volumeInfo.getState()).thenReturn(Volume.State.Attaching); + asyncJobManager.cleanupResources(job); + Mockito.verify(volumeInfo, Mockito.times(1)).stateTransit(Volume.Event.OperationFailed); + } + + @Test + public void testCleanupVmResource() throws NoTransitionException { + AsyncJobVO job = new AsyncJobVO(); + job.setInstanceType(ApiCommandResourceType.VirtualMachine.toString()); + job.setInstanceId(1L); + VMInstanceVO vmInstanceVO = Mockito.mock(VMInstanceVO.class); + when(vmInstanceDao.findById(Mockito.anyLong())).thenReturn(vmInstanceVO); + when(vmInstanceVO.getState()).thenReturn(VirtualMachine.State.Starting); + when(vmInstanceVO.getHostId()).thenReturn(1L); + asyncJobManager.cleanupResources(job); + Mockito.verify(virtualMachineManager, Mockito.times(1)).stateTransitTo(vmInstanceVO, VirtualMachine.Event.OperationFailed, 1L); + } + + @Test + public void testCleanupNetworkResource() throws NoTransitionException { + AsyncJobVO job = new AsyncJobVO(); + job.setInstanceType(ApiCommandResourceType.Network.toString()); + job.setInstanceId(1L); + NetworkVO networkVO = Mockito.mock(NetworkVO.class); + when(networkDao.findById(Mockito.anyLong())).thenReturn(networkVO); + when(networkVO.getState()).thenReturn(Network.State.Implementing); + asyncJobManager.cleanupResources(job); + Mockito.verify(networkOrchestrationService, Mockito.times(1)).stateTransitTo(networkVO, + Network.Event.OperationFailed); + } +} diff --git a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java index 04556c49ab2..288211c4330 100644 --- a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java +++ b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java @@ -26,6 +26,7 @@ import javax.naming.ConfigurationException; import com.cloud.dc.DataCenter; import com.cloud.network.PublicIpQuarantine; +import com.cloud.utils.fsm.NoTransitionException; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.api.command.admin.address.ReleasePodIpCmdByAdmin; import org.apache.cloudstack.api.command.admin.network.DedicateGuestVlanRangeCmd; @@ -824,6 +825,11 @@ public class MockNetworkManagerImpl extends ManagerBase implements NetworkOrches return null; } + @Override + public boolean stateTransitTo(Network network, Network.Event e) throws NoTransitionException { + return true; + } + @Override public boolean isNetworkInlineMode(Network network) { // TODO Auto-generated method stub From 080f171c6d70ca04a93d2fc49838e488d490cf8c Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Fri, 19 Jan 2024 10:48:08 -0500 Subject: [PATCH 06/11] NSX: Cleanup NSX resources during k8s cluster cleanup (#8528) --- .../cluster/KubernetesClusterHelper.java | 1 + .../cluster/KubernetesClusterHelperImpl.java | 13 +++++++++ .../dao/KubernetesClusterVmMapDao.java | 2 ++ .../dao/KubernetesClusterVmMapDaoImpl.java | 12 ++++++++ .../apache/cloudstack/service/NsxElement.java | 21 ++++++++------ .../java/com/cloud/vm/UserVmManagerImpl.java | 28 +++++++++++++++---- .../spring-server-core-managers-context.xml | 5 ++-- 7 files changed, 66 insertions(+), 16 deletions(-) diff --git a/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java b/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java index e445e50f82c..e160227749d 100644 --- a/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java +++ b/api/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelper.java @@ -22,4 +22,5 @@ import org.apache.cloudstack.acl.ControlledEntity; public interface KubernetesClusterHelper extends Adapter { ControlledEntity findByUuid(String uuid); + ControlledEntity findByVmId(long vmId); } diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java index 0ef916ab959..60bd81c7c5a 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterHelperImpl.java @@ -17,6 +17,7 @@ package com.cloud.kubernetes.cluster; import com.cloud.kubernetes.cluster.dao.KubernetesClusterDao; +import com.cloud.kubernetes.cluster.dao.KubernetesClusterVmMapDao; import com.cloud.utils.component.AdapterBase; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.framework.config.ConfigKey; @@ -24,18 +25,30 @@ import org.apache.cloudstack.framework.config.Configurable; import org.springframework.stereotype.Component; import javax.inject.Inject; +import java.util.Objects; @Component public class KubernetesClusterHelperImpl extends AdapterBase implements KubernetesClusterHelper, Configurable { @Inject private KubernetesClusterDao kubernetesClusterDao; + @Inject + private KubernetesClusterVmMapDao kubernetesClusterVmMapDao; @Override public ControlledEntity findByUuid(String uuid) { return kubernetesClusterDao.findByUuid(uuid); } + @Override + public ControlledEntity findByVmId(long vmId) { + KubernetesClusterVmMapVO clusterVmMapVO = kubernetesClusterVmMapDao.getClusterMapFromVmId(vmId); + if (Objects.isNull(clusterVmMapVO)) { + return null; + } + return kubernetesClusterDao.findById(clusterVmMapVO.getClusterId()); + } + @Override public String getConfigComponentName() { return KubernetesClusterHelper.class.getSimpleName(); diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java index 688a611ac99..45c0b79485c 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDao.java @@ -23,6 +23,8 @@ import java.util.List; public interface KubernetesClusterVmMapDao extends GenericDao { public List listByClusterId(long clusterId); + + public KubernetesClusterVmMapVO getClusterMapFromVmId(long vmId); public List listByClusterIdAndVmIdsIn(long clusterId, List vmIds); int removeByClusterIdAndVmIdsIn(long clusterId, List vmIds); diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java index b9f2ec917b2..0d90a4cdaca 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/dao/KubernetesClusterVmMapDaoImpl.java @@ -31,12 +31,17 @@ import com.cloud.utils.db.SearchCriteria; public class KubernetesClusterVmMapDaoImpl extends GenericDaoBase implements KubernetesClusterVmMapDao { private final SearchBuilder clusterIdSearch; + private final SearchBuilder vmIdSearch; public KubernetesClusterVmMapDaoImpl() { clusterIdSearch = createSearchBuilder(); clusterIdSearch.and("clusterId", clusterIdSearch.entity().getClusterId(), SearchCriteria.Op.EQ); clusterIdSearch.and("vmIdsIN", clusterIdSearch.entity().getVmId(), SearchCriteria.Op.IN); clusterIdSearch.done(); + + vmIdSearch = createSearchBuilder(); + vmIdSearch.and("vmId", vmIdSearch.entity().getVmId(), SearchCriteria.Op.EQ); + vmIdSearch.done(); } @Override @@ -47,6 +52,13 @@ public class KubernetesClusterVmMapDaoImpl extends GenericDaoBase sc = vmIdSearch.create(); + sc.setParameters("vmId", vmId); + return findOneBy(sc); + } + @Override public List listByClusterIdAndVmIdsIn(long clusterId, List vmIds) { SearchCriteria sc = clusterIdSearch.create(); diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java index 0986c367cf9..571e68496d6 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxElement.java @@ -505,10 +505,12 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns if (!canHandle(network, Network.Service.PortForwarding)) { return false; } + boolean result = true; for (PortForwardingRule rule : rules) { IPAddressVO publicIp = ApiDBUtils.findIpAddressById(rule.getSourceIpAddressId()); UserVm vm = ApiDBUtils.findUserVmById(rule.getVirtualMachineId()); - if (vm == null || networkModel.getNicInNetwork(vm.getId(), network.getId()) == null) { + if ((vm == null && (rule.getState() != FirewallRule.State.Revoke)) || + (vm != null && networkModel.getNicInNetwork(vm.getId(), network.getId()) == null)) { continue; } NsxOpObject nsxObject = getNsxOpObject(network); @@ -523,8 +525,8 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns .setNetworkResourceId(nsxObject.getNetworkResourceId()) .setNetworkResourceName(nsxObject.getNetworkResourceName()) .setVpcResource(nsxObject.isVpcResource()) - .setVmId(vm.getId()) - .setVmIp(vm.getPrivateIpAddress()) + .setVmId(Objects.nonNull(vm) ? vm.getId() : 0) + .setVmIp(Objects.nonNull(vm) ? vm.getPrivateIpAddress() : null) .setPublicIp(publicIp.getAddress().addr()) .setPrivatePort(privatePort) .setPublicPort(publicPort) @@ -532,12 +534,12 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns .setProtocol(rule.getProtocol().toUpperCase(Locale.ROOT)) .build(); if (rule.getState() == FirewallRule.State.Add) { - return nsxService.createPortForwardRule(networkRule); + result &= nsxService.createPortForwardRule(networkRule); } else if (rule.getState() == FirewallRule.State.Revoke) { - return nsxService.deletePortForwardRule(networkRule); + result &= nsxService.deletePortForwardRule(networkRule); } } - return true; + return result; } public Pair getVpcOrNetwork(Long vpcId, long networkId) { @@ -613,6 +615,7 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns @Override public boolean applyLBRules(Network network, List rules) throws ResourceUnavailableException { + boolean result = true; for (LoadBalancingRule loadBalancingRule : rules) { if (loadBalancingRule.getState() == FirewallRule.State.Active) { continue; @@ -638,12 +641,12 @@ public class NsxElement extends AdapterBase implements DhcpServiceProvider, Dns .setAlgorithm(loadBalancingRule.getAlgorithm()) .build(); if (loadBalancingRule.getState() == FirewallRule.State.Add) { - return nsxService.createLbRule(networkRule); + result &= nsxService.createLbRule(networkRule); } else if (loadBalancingRule.getState() == FirewallRule.State.Revoke) { - return nsxService.deleteLbRule(networkRule); + result &= nsxService.deleteLbRule(networkRule); } } - return true; + return result; } @Override diff --git a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java index 2927db638ab..844e0380757 100644 --- a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java @@ -51,6 +51,9 @@ import javax.naming.ConfigurationException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; +import com.cloud.kubernetes.cluster.KubernetesClusterHelper; +import com.cloud.network.dao.NsxProviderDao; +import com.cloud.network.element.NsxProviderVO; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.acl.SecurityChecker.AccessType; @@ -589,6 +592,8 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir @Inject VMScheduleManager vmScheduleManager; + @Inject + NsxProviderDao nsxProviderDao; private ScheduledExecutorService _executor = null; private ScheduledExecutorService _vmIpFetchExecutor = null; @@ -597,6 +602,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir private boolean _dailyOrHourly = false; private int capacityReleaseInterval; private ExecutorService _vmIpFetchThreadExecutor; + private List kubernetesClusterHelpers; private String _instance; @@ -610,6 +616,14 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir private static final int NUM_OF_2K_BLOCKS = 512; private static final int MAX_HTTP_POST_LENGTH = NUM_OF_2K_BLOCKS * MAX_USER_DATA_LENGTH_BYTES; + public List getKubernetesClusterHelpers() { + return kubernetesClusterHelpers; + } + + public void setKubernetesClusterHelpers(final List kubernetesClusterHelpers) { + this.kubernetesClusterHelpers = kubernetesClusterHelpers; + } + @Inject private OrchestrationService _orchSrvc; @@ -2528,11 +2542,15 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir } // cleanup port forwarding rules - if (_rulesMgr.revokePortForwardingRulesForVm(vmId)) { - s_logger.debug("Port forwarding rules are removed successfully as a part of vm id=" + vmId + " expunge"); - } else { - success = false; - s_logger.warn("Fail to remove port forwarding rules as a part of vm id=" + vmId + " expunge"); + VMInstanceVO vmInstanceVO = _vmInstanceDao.findById(vmId); + NsxProviderVO nsx = nsxProviderDao.findByZoneId(vmInstanceVO.getDataCenterId()); + if (Objects.isNull(nsx) || Objects.isNull(kubernetesClusterHelpers.get(0).findByVmId(vmId))) { + if (_rulesMgr.revokePortForwardingRulesForVm(vmId)) { + s_logger.debug("Port forwarding rules are removed successfully as a part of vm id=" + vmId + " expunge"); + } else { + success = false; + s_logger.warn("Fail to remove port forwarding rules as a part of vm id=" + vmId + " expunge"); + } } // cleanup load balancer rules diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 7227264e229..9656d544fbb 100644 --- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -106,8 +106,9 @@ - - + + + Date: Fri, 19 Jan 2024 12:53:23 -0500 Subject: [PATCH 07/11] fix test failure --- .../java/org/apache/cloudstack/service/NsxElementTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java b/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java index ae3143e32b8..39fb2e06120 100644 --- a/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java +++ b/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/service/NsxElementTest.java @@ -273,8 +273,13 @@ public class NsxElementTest { 5L, 2L, 15L); rule.setState(FirewallRule.State.Revoke); Network.Service service = new Network.Service("service1", new Network.Capability("capability")); - + VpcVO vpcVO = Mockito.mock(VpcVO.class); + when(vpcDao.findById(1L)).thenReturn(vpcVO); + when(vpcVO.getDomainId()).thenReturn(2L); + IPAddressVO ipAddress = new IPAddressVO(new Ip("10.1.13.10"), 1L, 1L, 1L,false); + when(ApiDBUtils.findIpAddressById(anyLong())).thenReturn(ipAddress); when(nsxElement.canHandle(networkVO, service)).thenReturn(true); + when(nsxService.deletePortForwardRule(any(NsxNetworkRule.class))).thenReturn(true); assertTrue(nsxElement.applyPFRules(networkVO, List.of(rule))); } From f01bb5d44008ec0c542c19864aceadfb3d7f58de Mon Sep 17 00:00:00 2001 From: Nicolas Vazquez Date: Fri, 19 Jan 2024 16:59:05 -0300 Subject: [PATCH 08/11] NSX: Improve segment deletion process (#8538) --- .../cloudstack/resource/NsxResource.java | 35 ++---------- .../cloudstack/service/NsxApiClient.java | 55 +++++++++++++++++-- .../cloudstack/service/NsxServiceImpl.java | 2 +- .../cloudstack/resource/NsxResourceTest.java | 28 ++-------- 4 files changed, 62 insertions(+), 58 deletions(-) diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java index a90266f7164..655bd823325 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java @@ -30,9 +30,7 @@ import com.cloud.utils.exception.CloudRuntimeException; import com.vmware.nsx.model.TransportZone; import com.vmware.nsx.model.TransportZoneListResult; -import com.vmware.nsx_policy.model.EnforcementPointListResult; import com.vmware.nsx_policy.model.Segment; -import com.vmware.nsx_policy.model.SiteListResult; import org.apache.cloudstack.NsxAnswer; import org.apache.cloudstack.StartupNsxCommand; import org.apache.cloudstack.agent.api.CreateNsxDhcpRelayConfigCommand; @@ -320,33 +318,17 @@ public class NsxResource implements ServerResource { private Answer executeRequest(CreateNsxSegmentCommand cmd) { try { - SiteListResult sites = nsxApiClient.getSites(); - String errorMsg; - String networkName = cmd.getNetworkName(); - if (CollectionUtils.isEmpty(sites.getResults())) { - errorMsg = String.format("Failed to create network: %s as no sites are found in the linked NSX infrastructure", networkName); - LOGGER.error(errorMsg); - return new NsxAnswer(cmd, new CloudRuntimeException(errorMsg)); - } - String siteId = sites.getResults().get(0).getId(); - - EnforcementPointListResult epList = nsxApiClient.getEnforcementPoints(siteId); - if (CollectionUtils.isEmpty(epList.getResults())) { - errorMsg = String.format("Failed to create network: %s as no enforcement points are found in the linked NSX infrastructure", networkName); - LOGGER.error(errorMsg); - return new NsxAnswer(cmd, new CloudRuntimeException(errorMsg)); - } - String enforcementPointPath = epList.getResults().get(0).getPath(); - + String siteId = nsxApiClient.getDefaultSiteId(); + String enforcementPointPath = nsxApiClient.getDefaultEnforcementPointPath(siteId); TransportZoneListResult transportZoneListResult = nsxApiClient.getTransportZones(); if (CollectionUtils.isEmpty(transportZoneListResult.getResults())) { - errorMsg = String.format("Failed to create network: %s as no transport zones were found in the linked NSX infrastructure", networkName); + String errorMsg = String.format("Failed to create network: %s as no transport zones were found in the linked NSX infrastructure", cmd.getNetworkName()); LOGGER.error(errorMsg); return new NsxAnswer(cmd, new CloudRuntimeException(errorMsg)); } List transportZones = transportZoneListResult.getResults().stream().filter(tz -> tz.getDisplayName().equals(transportZone)).collect(Collectors.toList()); if (CollectionUtils.isEmpty(transportZones)) { - errorMsg = String.format("Failed to create network: %s as no transport zone of name %s was found in the linked NSX infrastructure", networkName, transportZone); + String errorMsg = String.format("Failed to create network: %s as no transport zone of name %s was found in the linked NSX infrastructure", cmd.getNetworkName(), transportZone); LOGGER.error(errorMsg); return new NsxAnswer(cmd, new CloudRuntimeException(errorMsg)); } @@ -371,14 +353,9 @@ public class NsxResource implements ServerResource { String segmentName = NsxControllerUtils.getNsxSegmentId(cmd.getDomainId(), cmd.getAccountId(), cmd.getZoneId(), cmd.getVpcId(), cmd.getNetworkId()); try { - Thread.sleep(30 * 1000L); nsxApiClient.deleteSegment(cmd.getZoneId(), cmd.getDomainId(), cmd.getAccountId(), cmd.getVpcId(), cmd.getNetworkId(), segmentName); - } catch (InterruptedException | ThreadDeath e) { - LOGGER.error("Thread interrupted", e); - Thread.currentThread().interrupt(); - return new NsxAnswer(cmd, new CloudRuntimeException(e.getMessage())); } catch (Exception e) { - LOGGER.error(String.format("Failed to delete NSX segment: %s", segmentName)); + LOGGER.error(String.format("Failed to delete NSX segment %s: %s", segmentName, e.getMessage())); return new NsxAnswer(cmd, new CloudRuntimeException(e.getMessage())); } return new NsxAnswer(cmd, true, null); @@ -485,7 +462,7 @@ public class NsxResource implements ServerResource { try { nsxApiClient.deleteDistributedFirewallRules(segmentName, rules); } catch (Exception e) { - LOGGER.error(String.format("Failed to create NSX distributed firewall %s: %s", segmentName, e.getMessage()), e); + LOGGER.error(String.format("Failed to delete NSX distributed firewall %s: %s", segmentName, e.getMessage()), e); return new NsxAnswer(cmd, new CloudRuntimeException(e.getMessage())); } return new NsxAnswer(cmd, true, null); diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java index 4356c97d754..20df24f8976 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java @@ -31,6 +31,7 @@ import com.vmware.nsx_policy.infra.Sites; import com.vmware.nsx_policy.infra.Tier1s; import com.vmware.nsx_policy.infra.domains.Groups; import com.vmware.nsx_policy.infra.domains.SecurityPolicies; +import com.vmware.nsx_policy.infra.domains.groups.members.SegmentPorts; import com.vmware.nsx_policy.infra.domains.security_policies.Rules; import com.vmware.nsx_policy.infra.sites.EnforcementPoints; import com.vmware.nsx_policy.infra.tier_0s.LocaleServices; @@ -51,6 +52,7 @@ import com.vmware.nsx_policy.model.LBVirtualServer; import com.vmware.nsx_policy.model.LBVirtualServerListResult; import com.vmware.nsx_policy.model.LocaleServicesListResult; import com.vmware.nsx_policy.model.PathExpression; +import com.vmware.nsx_policy.model.PolicyGroupMembersListResult; import com.vmware.nsx_policy.model.PolicyNatRule; import com.vmware.nsx_policy.model.PolicyNatRuleListResult; import com.vmware.nsx_policy.model.Rule; @@ -343,7 +345,17 @@ public class NsxApiClient { } - public SiteListResult getSites() { + public String getDefaultSiteId() { + SiteListResult sites = getSites(); + if (CollectionUtils.isEmpty(sites.getResults())) { + String errorMsg = "No sites are found in the linked NSX infrastructure"; + LOGGER.error(errorMsg); + throw new CloudRuntimeException(errorMsg); + } + return sites.getResults().get(0).getId(); + } + + protected SiteListResult getSites() { try { Sites sites = (Sites) nsxService.apply(Sites.class); return sites.list(null, false, null, null, null, null); @@ -352,7 +364,17 @@ public class NsxApiClient { } } - public EnforcementPointListResult getEnforcementPoints(String siteId) { + public String getDefaultEnforcementPointPath(String siteId) { + EnforcementPointListResult epList = getEnforcementPoints(siteId); + if (CollectionUtils.isEmpty(epList.getResults())) { + String errorMsg = String.format("No enforcement points are found in the linked NSX infrastructure for site ID %s", siteId); + LOGGER.error(errorMsg); + throw new CloudRuntimeException(errorMsg); + } + return epList.getResults().get(0).getPath(); + } + + protected EnforcementPointListResult getEnforcementPoints(String siteId) { try { EnforcementPoints enforcementPoints = (EnforcementPoints) nsxService.apply(EnforcementPoints.class); return enforcementPoints.list(siteId, null, false, null, null, null, null); @@ -397,11 +419,8 @@ public class NsxApiClient { public void deleteSegment(long zoneId, long domainId, long accountId, Long vpcId, long networkId, String segmentName) { try { - Segments segmentService = (Segments) nsxService.apply(Segments.class); removeSegmentDistributedFirewallRules(segmentName); - removeGroupForSegment(segmentName); - LOGGER.debug(String.format("Removing the segment with ID %s", segmentName)); - segmentService.delete(segmentName); + removeSegment(segmentName); DhcpRelayConfigs dhcpRelayConfig = (DhcpRelayConfigs) nsxService.apply(DhcpRelayConfigs.class); String dhcpRelayConfigId = NsxControllerUtils.getNsxDhcpRelayConfigId(zoneId, domainId, accountId, vpcId, networkId); LOGGER.debug(String.format("Removing the DHCP relay config with ID %s", dhcpRelayConfigId)); @@ -414,6 +433,30 @@ public class NsxApiClient { } } + protected void removeSegment(String segmentName) { + LOGGER.debug(String.format("Removing the segment with ID %s", segmentName)); + Segments segmentService = (Segments) nsxService.apply(Segments.class); + Segment segment = segmentService.get(segmentName); + if (segment == null) { + LOGGER.error(String.format("The segment with ID %s is not found, skipping removal", segmentName)); + return; + } + String siteId = getDefaultSiteId(); + String enforcementPointPath = getDefaultEnforcementPointPath(siteId); + SegmentPorts segmentPortsService = (SegmentPorts) nsxService.apply(SegmentPorts.class); + PolicyGroupMembersListResult segmentPortsList = segmentPortsService.list(DEFAULT_DOMAIN, segmentName, null, enforcementPointPath, + false, null, 50L, false, null); + if (segmentPortsList.getResultCount() == 0L) { + LOGGER.debug(String.format("Removing the segment with ID %s", segmentName)); + removeGroupForSegment(segmentName); + segmentService.delete(segmentName); + } else { + String msg = String.format("Cannot remove the NSX segment %s because there are still %s port group(s) attached to it", segmentName, segmentPortsList.getResultCount()); + LOGGER.debug(msg); + throw new CloudRuntimeException(msg); + } + } + public void createStaticNatRule(String vpcName, String tier1GatewayName, String ruleName, String publicIp, String vmIp) { try { diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxServiceImpl.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxServiceImpl.java index 481b68f9840..d53946ccf7e 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxServiceImpl.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxServiceImpl.java @@ -109,7 +109,7 @@ public class NsxServiceImpl implements NsxService { network.getVpcId(), vpcName, network.getId(), network.getName()); NsxAnswer result = nsxControllerUtils.sendNsxCommand(deleteNsxSegmentCommand, network.getDataCenterId()); if (!result.getResult()) { - String msg = String.format("Could not remove the NSX segment for network %s", network.getName()); + String msg = String.format("Could not remove the NSX segment for network %s: %s", network.getName(), result.getDetails()); LOGGER.error(msg); throw new CloudRuntimeException(msg); } diff --git a/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/resource/NsxResourceTest.java b/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/resource/NsxResourceTest.java index b730348e0a0..ee4f4fb64c2 100644 --- a/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/resource/NsxResourceTest.java +++ b/plugins/network-elements/nsx/src/test/java/org/apache/cloudstack/resource/NsxResourceTest.java @@ -21,9 +21,7 @@ import com.cloud.utils.exception.CloudRuntimeException; import com.vmware.nsx.model.TransportZone; import com.vmware.nsx.model.TransportZoneListResult; import com.vmware.nsx_policy.model.EnforcementPoint; -import com.vmware.nsx_policy.model.EnforcementPointListResult; import com.vmware.nsx_policy.model.Site; -import com.vmware.nsx_policy.model.SiteListResult; import junit.framework.Assert; import org.apache.cloudstack.NsxAnswer; import org.apache.cloudstack.agent.api.CreateNsxDistributedFirewallRulesCommand; @@ -74,10 +72,6 @@ public class NsxResourceTest { NsxResource nsxResource; AutoCloseable closeable; @Mock - EnforcementPointListResult enforcementPointListResult; - @Mock - SiteListResult siteListResult; - @Mock TransportZoneListResult transportZoneListResult; private static final String transportZone = "Overlay"; @@ -177,13 +171,9 @@ public class NsxResourceTest { NsxCommand command = new CreateNsxSegmentCommand(domainId, accountId, zoneId, 2L, "VPC01", 3L, "Web", "10.10.10.1", "10.10.10.0/24"); - when(nsxApi.getSites()).thenReturn(siteListResult); - when(siteListResult.getResults()).thenReturn(siteList); - when(siteList.get(0).getId()).thenReturn("site1"); + when(nsxApi.getDefaultSiteId()).thenReturn("site1"); - when(nsxApi.getEnforcementPoints(anyString())).thenReturn(enforcementPointListResult); - when(enforcementPointListResult.getResults()).thenReturn(enforcementPointList); - when(enforcementPointList.get(0).getPath()).thenReturn("enforcementPointPath"); + when(nsxApi.getDefaultEnforcementPointPath(anyString())).thenReturn("enforcementPointPath"); when(nsxApi.getTransportZones()).thenReturn(transportZoneListResult); when(transportZoneListResult.getResults()).thenReturn(transportZoneList); @@ -194,7 +184,7 @@ public class NsxResourceTest { @Test public void testCreateNsxSegmentEmptySites() { - when(nsxApi.getSites()).thenReturn(null); + when(nsxApi.getDefaultSiteId()).thenReturn(null); CreateNsxSegmentCommand command = Mockito.mock(CreateNsxSegmentCommand.class); NsxAnswer answer = (NsxAnswer) nsxResource.executeRequest(command); assertFalse(answer.getResult()); @@ -203,11 +193,8 @@ public class NsxResourceTest { @Test public void testCreateNsxSegmentEmptyEnforcementPoints() { Site site = mock(Site.class); - List siteList = List.of(site); - when(nsxApi.getSites()).thenReturn(siteListResult); - when(siteListResult.getResults()).thenReturn(siteList); - when(siteList.get(0).getId()).thenReturn("site1"); - when(nsxApi.getEnforcementPoints(anyString())).thenReturn(null); + when(nsxApi.getDefaultSiteId()).thenReturn("site1"); + when(nsxApi.getDefaultEnforcementPointPath(anyString())).thenReturn(null); CreateNsxSegmentCommand command = Mockito.mock(CreateNsxSegmentCommand.class); NsxAnswer answer = (NsxAnswer) nsxResource.executeRequest(command); assertFalse(answer.getResult()); @@ -216,10 +203,7 @@ public class NsxResourceTest { @Test public void testCreateNsxSegmentEmptyTransportZones() { Site site = mock(Site.class); - List siteList = List.of(site); - when(nsxApi.getSites()).thenReturn(siteListResult); - when(siteListResult.getResults()).thenReturn(siteList); - when(siteList.get(0).getId()).thenReturn("site1"); + when(nsxApi.getDefaultSiteId()).thenReturn("site1"); CreateNsxSegmentCommand command = Mockito.mock(CreateNsxSegmentCommand.class); NsxAnswer answer = (NsxAnswer) nsxResource.executeRequest(command); assertFalse(answer.getResult()); From 19ae12a05ac03d0849a5cd94c0ec7ef03811fe69 Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Sun, 21 Jan 2024 20:18:05 -0500 Subject: [PATCH 09/11] NSX: Add passive monitor for NSX LB to test whether a server is available (#8533) * NSX: Add passive monitor for NSX LB to test whether a server is available * Add active monitors too * fix build failure --- .../cloudstack/resource/NsxResource.java | 2 +- .../cloudstack/service/NsxApiClient.java | 53 +++++++++++++++++-- .../cloudstack/utils/NsxControllerUtils.java | 4 ++ 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java index 655bd823325..ec4de6b3ef7 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/resource/NsxResource.java @@ -421,7 +421,7 @@ public class NsxResource implements ServerResource { String ruleName = NsxControllerUtils.getLoadBalancerRuleName(tier1GatewayName, cmd.getLbId()); try { nsxApiClient.createAndAddNsxLbVirtualServer(tier1GatewayName, cmd.getLbId(), cmd.getPublicIp(), cmd.getPublicPort(), - cmd.getMemberList(), cmd.getAlgorithm(), cmd.getProtocol()); + cmd.getMemberList(), cmd.getAlgorithm(), cmd.getProtocol(), cmd.getPrivatePort()); } catch (Exception e) { LOGGER.error(String.format("Failed to add NSX load balancer rule %s for network: %s", ruleName, cmd.getNetworkResourceName())); return new NsxAnswer(cmd, new CloudRuntimeException(e.getMessage())); diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java index 20df24f8976..d4ac0e7a7e2 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/service/NsxApiClient.java @@ -22,6 +22,7 @@ import com.vmware.nsx.model.TransportZone; import com.vmware.nsx.model.TransportZoneListResult; import com.vmware.nsx_policy.infra.DhcpRelayConfigs; import com.vmware.nsx_policy.infra.LbAppProfiles; +import com.vmware.nsx_policy.infra.LbMonitorProfiles; import com.vmware.nsx_policy.infra.LbPools; import com.vmware.nsx_policy.infra.LbServices; import com.vmware.nsx_policy.infra.LbVirtualServers; @@ -44,10 +45,13 @@ import com.vmware.nsx_policy.model.GroupListResult; import com.vmware.nsx_policy.model.ICMPTypeServiceEntry; import com.vmware.nsx_policy.model.L4PortSetServiceEntry; import com.vmware.nsx_policy.model.LBAppProfileListResult; +import com.vmware.nsx_policy.model.LBMonitorProfileListResult; import com.vmware.nsx_policy.model.LBPool; import com.vmware.nsx_policy.model.LBPoolListResult; import com.vmware.nsx_policy.model.LBPoolMember; import com.vmware.nsx_policy.model.LBService; +import com.vmware.nsx_policy.model.LBTcpMonitorProfile; +import com.vmware.nsx_policy.model.LBUdpMonitorProfile; import com.vmware.nsx_policy.model.LBVirtualServer; import com.vmware.nsx_policy.model.LBVirtualServerListResult; import com.vmware.nsx_policy.model.LocaleServicesListResult; @@ -95,6 +99,7 @@ import static org.apache.cloudstack.utils.NsxControllerUtils.getVirtualServerNam import static org.apache.cloudstack.utils.NsxControllerUtils.getServiceEntryName; import static org.apache.cloudstack.utils.NsxControllerUtils.getLoadBalancerName; import static org.apache.cloudstack.utils.NsxControllerUtils.getLoadBalancerAlgorithm; +import static org.apache.cloudstack.utils.NsxControllerUtils.getActiveMonitorProfileName; public class NsxApiClient { @@ -113,6 +118,10 @@ public class NsxApiClient { protected static final String SEGMENTS_PATH = "/infra/segments"; protected static final String DEFAULT_DOMAIN = "default"; protected static final String GROUPS_PATH_PREFIX = "/infra/domains/default/groups"; + // TODO: Pass as global / zone-level setting? + protected static final String NSX_LB_PASSIVE_MONITOR = "/infra/lb-monitor-profiles/default-passive-lb-monitor"; + protected static final String TCP_MONITOR_PROFILE = "LBTcpMonitorProfile"; + protected static final String UDP_MONITOR_PROFILE = "LBUdpMonitorProfile"; private enum PoolAllocation { ROUTING, LB_SMALL, LB_MEDIUM, LB_LARGE, LB_XLARGE } @@ -549,8 +558,10 @@ public class NsxApiClient { } return members; } - public void createNsxLbServerPool(List memberList, String tier1GatewayName, String lbServerPoolName, String algorithm) { + public void createNsxLbServerPool(List memberList, String tier1GatewayName, String lbServerPoolName, + String algorithm, String privatePort, String protocol) { try { + String activeMonitorPath = getLbActiveMonitorPath(lbServerPoolName, privatePort, protocol); List members = getLbPoolMembers(memberList, tier1GatewayName); LbPools lbPools = (LbPools) nsxService.apply(LbPools.class); LBPool lbPool = new LBPool.Builder() @@ -558,6 +569,8 @@ public class NsxApiClient { .setDisplayName(lbServerPoolName) .setAlgorithm(getLoadBalancerAlgorithm(algorithm)) .setMembers(members) + .setPassiveMonitorPath(NSX_LB_PASSIVE_MONITOR) + .setActiveMonitorPaths(List.of(activeMonitorPath)) .build(); lbPools.patch(lbServerPoolName, lbPool); } catch (Error error) { @@ -568,6 +581,32 @@ public class NsxApiClient { } } + private String getLbActiveMonitorPath(String lbServerPoolName, String port, String protocol) { + LbMonitorProfiles lbActiveMonitor = (LbMonitorProfiles) nsxService.apply(LbMonitorProfiles.class); + String lbMonitorProfileId = getActiveMonitorProfileName(lbServerPoolName, port, protocol); + if ("TCP".equals(protocol.toUpperCase(Locale.ROOT))) { + LBTcpMonitorProfile lbTcpMonitorProfile = new LBTcpMonitorProfile.Builder(TCP_MONITOR_PROFILE) + .setDisplayName(lbMonitorProfileId) + .setMonitorPort(Long.parseLong(port)) + .build(); + lbActiveMonitor.patch(lbMonitorProfileId, lbTcpMonitorProfile); + } else if ("UDP".equals(protocol.toUpperCase(Locale.ROOT))) { + LBUdpMonitorProfile lbUdpMonitorProfile = new LBUdpMonitorProfile.Builder(UDP_MONITOR_PROFILE) + .setDisplayName(lbMonitorProfileId) + .setMonitorPort(Long.parseLong(port)) + .build(); + lbActiveMonitor.patch(lbMonitorProfileId, lbUdpMonitorProfile); + } + + LBMonitorProfileListResult listResult = listLBActiveMonitors(lbActiveMonitor); + Optional monitorProfile = listResult.getResults().stream().filter(profile -> profile._getDataValue().getField("id").toString().equals(lbMonitorProfileId)).findFirst(); + return monitorProfile.map(structure -> structure._getDataValue().getField("path").toString()).orElse(null); + } + + LBMonitorProfileListResult listLBActiveMonitors(LbMonitorProfiles lbActiveMonitor) { + return lbActiveMonitor.list(null, false, null, null, null, null); + } + public void createNsxLoadBalancer(String tier1GatewayName) { try { String lbName = getLoadBalancerName(tier1GatewayName); @@ -593,10 +632,10 @@ public class NsxApiClient { } public void createAndAddNsxLbVirtualServer(String tier1GatewayName, long lbId, String publicIp, String publicPort, - List memberList, String algorithm, String protocol) { + List memberList, String algorithm, String protocol, String privatePort) { try { String lbServerPoolName = getServerPoolName(tier1GatewayName, lbId); - createNsxLbServerPool(memberList, tier1GatewayName, lbServerPoolName, algorithm); + createNsxLbServerPool(memberList, tier1GatewayName, lbServerPoolName, algorithm, privatePort, protocol); createNsxLoadBalancer(tier1GatewayName); String lbVirtualServerName = getVirtualServerName(tier1GatewayName, lbId); @@ -632,6 +671,14 @@ public class NsxApiClient { String lbServerPoolName = getServerPoolName(tier1GatewayName, lbId); lbPools.delete(lbServerPoolName, false); + // delete associated LB Active monitor profile + LbMonitorProfiles lbActiveMonitor = (LbMonitorProfiles) nsxService.apply(LbMonitorProfiles.class); + LBMonitorProfileListResult listResult = listLBActiveMonitors(lbActiveMonitor); + List profileIds = listResult.getResults().stream().filter(profile -> profile._getDataValue().getField("id").toString().contains(lbServerPoolName)) + .map(profile -> profile._getDataValue().getField("id").toString()).collect(Collectors.toList()); + for(String profileId : profileIds) { + lbActiveMonitor.delete(profileId, true); + } // Delete load balancer LBVirtualServerListResult lbVsListResult = lbVirtualServers.list(null, null, null, null, null, null); LBPoolListResult lbPoolListResult = lbPools.list(null, null, null, null, null, null); diff --git a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/utils/NsxControllerUtils.java b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/utils/NsxControllerUtils.java index 7a4d5cfcc8b..0dd3c8ba1e4 100644 --- a/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/utils/NsxControllerUtils.java +++ b/plugins/network-elements/nsx/src/main/java/org/apache/cloudstack/utils/NsxControllerUtils.java @@ -122,6 +122,10 @@ public class NsxControllerUtils { return getLoadBalancerRuleName(tier1GatewayName, lbId) + "-SP"; } + public static String getActiveMonitorProfileName(String lbServerPoolName, String port, String protocol) { + return lbServerPoolName + "-" + protocol + "-" + port + "-AM"; + } + public static String getVirtualServerName(String tier1GatewayName, long lbId) { return getLoadBalancerRuleName(tier1GatewayName, lbId) + "-VS"; } From 19250403e645c76f60b17aa4aeb4dc915f5ca206 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 22 Jan 2024 10:26:40 +0530 Subject: [PATCH 10/11] ui: fix create k8s cluster multiple listing (#8539) Fixes #8536 Signed-off-by: Abhishek Kumar --- .../views/compute/CreateKubernetesCluster.vue | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/ui/src/views/compute/CreateKubernetesCluster.vue b/ui/src/views/compute/CreateKubernetesCluster.vue index 1240531ddd7..ca1e424cca4 100644 --- a/ui/src/views/compute/CreateKubernetesCluster.vue +++ b/ui/src/views/compute/CreateKubernetesCluster.vue @@ -259,18 +259,12 @@ export default { this.apiParams = this.$getApiParams('createKubernetesCluster') }, created () { - this.networks = [ - { - id: null, - name: '' - } - ] - this.keyPairs = [ - { - id: null, - name: '' - } - ] + this.emptyEntry = { + id: null, + name: '' + } + this.networks = [this.emptyEntry] + this.keyPairs = [this.emptyEntry] this.initForm() this.fetchData() }, @@ -322,7 +316,6 @@ export default { }, fetchData () { this.fetchZoneData() - this.fetchNetworkData() this.fetchKeyPairData() }, isValidValueForKey (obj, key) { @@ -417,14 +410,16 @@ export default { params.zoneid = this.selectedZone.id } this.networkLoading = true + this.networks = [] api('listNetworks', params).then(json => { var listNetworks = json.listnetworksresponse.network if (this.arrayHasItems(listNetworks)) { listNetworks = listNetworks.filter(n => n.type !== 'L2') - this.networks = this.networks.concat(listNetworks) + this.networks = listNetworks } }).finally(() => { this.networkLoading = false + this.networks = [this.emptyEntry].concat(this.networks) if (this.arrayHasItems(this.networks)) { this.form.networkid = 0 } From e518f1933a91292b46a5bde0814df56a07703d32 Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Mon, 22 Jan 2024 18:06:35 -0500 Subject: [PATCH 11/11] NSX: Add check for ICMP code / type for NSX zones (#8542) --- .../network/firewall/FirewallManagerImpl.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java b/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java index b08df5a3d1b..a816a70cdf3 100644 --- a/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java +++ b/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java @@ -22,12 +22,18 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Objects; import java.util.Set; import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.dc.DataCenter; +import com.cloud.network.dao.NsxProviderDao; +import com.cloud.network.element.NsxProviderVO; +import com.cloud.utils.db.EntityManager; import org.apache.cloudstack.api.command.user.firewall.IListFirewallRulesCmd; import org.apache.cloudstack.api.command.user.ipv6.ListIpv6FirewallRulesCmd; import org.apache.cloudstack.context.CallContext; @@ -137,6 +143,10 @@ public class FirewallManagerImpl extends ManagerBase implements FirewallService, NetworkDao _networkDao; @Inject VpcManager _vpcMgr; + @Inject + EntityManager entityManager; + @Inject + NsxProviderDao nsxProviderDao; List _firewallElements; List _pfElements; @@ -689,6 +699,9 @@ public class FirewallManagerImpl extends ManagerBase implements FirewallService, } for (FirewallRuleVO rule : rules) { + // validate rule - for NSX + long networkId = rule.getNetworkId(); + validateNsxConstraints(networkId, rule.getProtocol(), rule.getIcmpType(), rule.getIcmpCode()); // load cidrs if any rule.setSourceCidrList(_firewallCidrsDao.getSourceCidrs(rule.getId())); rule.setDestinationCidrsList(_firewallDcidrsDao.getDestCidrs(rule.getId())); @@ -710,6 +723,20 @@ public class FirewallManagerImpl extends ManagerBase implements FirewallService, return true; } + private void validateNsxConstraints(long networkId, String protocol, Integer icpmType, Integer icmpCode) { + final Network network = entityManager.findById(Network.class, networkId); + final DataCenter dc = entityManager.findById(DataCenter.class, network.getDataCenterId()); + final NsxProviderVO nsxProvider = nsxProviderDao.findByZoneId(dc.getId()); + if (Objects.isNull(nsxProvider)) { + return; + } + if (NetUtils.ICMP_PROTO.equals(protocol.toLowerCase(Locale.ROOT)) && (icpmType == -1 || icmpCode == -1)) { + String errorMsg = "Passing -1 for ICMP type is not supported for NSX enabled zones"; + s_logger.error(errorMsg); + throw new InvalidParameterValueException(errorMsg); + } + } + @Override public boolean applyDefaultEgressFirewallRule(Long networkId, boolean defaultPolicy, boolean add) throws ResourceUnavailableException {