From 723bf1445f1aaa2e0ff1196884509541e351c6e2 Mon Sep 17 00:00:00 2001 From: Eugenio Grosso Date: Thu, 23 Apr 2026 12:21:31 +0000 Subject: [PATCH] kvm/flasharray: address review feedback on NVMe-TCP PR Apply the review comments from the first round on #13061: * FlashArrayAdapter.snapshot() and both getSnapshot() entry points now wrap the returned FlashArrayVolume in withAddressType(). Without this, snapshots taken against an NVMe-TCP pool had the constructor-default AddressType.FIBERWWN and ProviderSnapshot.getAddress() emitted an FC style WWN instead of the NVMe EUI-128, which the adaptive driver then persisted as the snapshot path. Verified end-to-end against Purity 6.7.7: a fresh NVMe-TCP snapshot now lands with install_path starting 006c... , matching the source volume's EUI (previously it was 6-24a9370...). * FlashArrayAdapter.attach() - retry path after 'Connection already exists' no longer requires a hostgroup-scoped match for NVMe-TCP. If hostgroup is not configured, or the existing connection is host-scoped, fall back to matching by host name, same as the Fibre Channel branch. Also normalize the 'volume lun is not found' message when no connection list is returned. * FlashArrayAdapter.attach() - initial 'Volume attach did not return lun information' exception message now mentions both lun (FC) and nsid (NVMe-TCP) so the error is not misleading on NVMe deployments. * FlashArrayAdapter.getVolumeByAddress() - validate the EUI-128 length before slicing. A short/malformed address used to throw StringIndexOutOfBoundsException deep inside getFlashArrayItem and be swallowed as 'not found'; now a clear RuntimeException is raised with the expected vs actual length. * FlashArrayVolume.getAddress() - same defensive check when building an EUI-128 from the FlashArray volume serial; if the serial is shorter than 24 hex chars, fail with a clear message instead of SIOOBE. * MultipathNVMeOFAdapterBase.connectPhysicalDisk() - Integer.parseInt of the STORAGE_POOL_DISK_WAIT detail is now guarded; a non-numeric value falls back to the default rather than aborting the connect. * MultipathNVMeOFAdapterBase.rescanAllControllers() - honour the boolean return from Process.waitFor(). If an nvme ns-rescan invocation does not complete in NS_RESCAN_TIMEOUT_SECS we destroyForcibly() it, so hung nvme-cli processes do not accumulate while the namespace poll loop retries. * NVMeTCPAdapter - rename LOGGER_NVMETCP to LOGGER to match the naming convention used in the other KVM adapters. Signed-off-by: Eugenio Grosso --- .../storage/MultipathNVMeOFAdapterBase.java | 18 ++++++++- .../kvm/storage/NVMeTCPAdapter.java | 4 +- .../adapter/flasharray/FlashArrayAdapter.java | 40 +++++++++++++++---- .../adapter/flasharray/FlashArrayVolume.java | 6 +++ 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/MultipathNVMeOFAdapterBase.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/MultipathNVMeOFAdapterBase.java index 59f1abd3bec..e37906cde2e 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/MultipathNVMeOFAdapterBase.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/MultipathNVMeOFAdapterBase.java @@ -185,7 +185,13 @@ public abstract class MultipathNVMeOFAdapterBase implements StorageAdaptor { if (details != null && details.containsKey(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString())) { String waitTime = details.get(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString()); if (StringUtils.isNotEmpty(waitTime)) { - waitSecs = Integer.parseInt(waitTime); + try { + waitSecs = Integer.parseInt(waitTime); + } catch (NumberFormatException e) { + LOGGER.warn("Ignoring non-numeric " + com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString() + + "=[" + waitTime + "] on pool " + pool.getUuid() + ", falling back to default " + + DEFAULT_DISK_WAIT_SECS + "s"); + } } } return waitForNamespace(address, pool, waitSecs); @@ -234,7 +240,15 @@ public abstract class MultipathNVMeOFAdapterBase implements StorageAdaptor { for (File ctrl : ctrls) { Process p = new ProcessBuilder("nvme", "ns-rescan", "/dev/" + ctrl.getName()) .redirectErrorStream(true).start(); - p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS); + if (!p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS)) { + // Kill runaway nvme-cli invocations so they do not pile + // up under the JVM on every poll iteration while we + // are still waiting for the namespace to appear. + LOGGER.debug("nvme ns-rescan /dev/" + ctrl.getName() + + " did not complete within " + NS_RESCAN_TIMEOUT_SECS + + "s; terminating"); + p.destroyForcibly(); + } } } catch (Exception e) { LOGGER.debug("nvme ns-rescan attempt failed: " + e.getMessage()); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/NVMeTCPAdapter.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/NVMeTCPAdapter.java index 187b3abbbae..596f5d7bb9c 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/NVMeTCPAdapter.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/NVMeTCPAdapter.java @@ -28,10 +28,10 @@ import org.apache.logging.log4j.Logger; * {@link KVMStoragePoolManager} can find it via reflection. */ public class NVMeTCPAdapter extends MultipathNVMeOFAdapterBase { - private static final Logger LOGGER_NVMETCP = LogManager.getLogger(NVMeTCPAdapter.class); + private static final Logger LOGGER = LogManager.getLogger(NVMeTCPAdapter.class); public NVMeTCPAdapter() { - LOGGER_NVMETCP.info("Loaded NVMeTCPAdapter for StorageLayer"); + LOGGER.info("Loaded NVMeTCPAdapter for StorageLayer"); } @Override diff --git a/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayAdapter.java b/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayAdapter.java index f76d64bd2dc..3a58d9f806d 100644 --- a/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayAdapter.java +++ b/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayAdapter.java @@ -160,7 +160,8 @@ public class FlashArrayAdapter implements ProviderAdapter { } if (list == null || list.getItems() == null || list.getItems().size() == 0) { - throw new RuntimeException("Volume attach did not return lun information"); + throw new RuntimeException("Volume attach did not return connection information " + + "(expected lun for Fibre Channel or nsid for NVMe-TCP)"); } FlashArrayConnection connection = (FlashArrayConnection) this.getFlashArrayItem(list); @@ -186,10 +187,22 @@ public class FlashArrayAdapter implements ProviderAdapter { if (list != null && list.getItems() != null) { for (FlashArrayConnection conn : list.getItems()) { if (AddressType.NVMETCP.equals(volumeAddressType)) { - if (conn.getHostGroup() != null && conn.getHostGroup().getName() != null + // Prefer a hostgroup-scoped match when a hostgroup is configured + // on the pool; otherwise fall through to matching the connection + // by host like the Fibre Channel branch below. Covers both + // transport=nvme-tcp deployments with and without hostgroup=. + if (hostgroup != null && conn.getHostGroup() != null + && conn.getHostGroup().getName() != null && conn.getHostGroup().getName().equals(hostgroup)) { return conn.getNsid() != null ? "" + conn.getNsid() : "1"; } + if (conn.getHost() != null && conn.getHost().getName() != null + && (conn.getHost().getName().equals(hostname) + || (hostname.indexOf('.') > 0 + && conn.getHost().getName() + .equals(hostname.substring(0, hostname.indexOf('.')))))) { + return conn.getNsid() != null ? "" + conn.getNsid() : "1"; + } } else if (conn.getHost() != null && conn.getHost().getName() != null && (conn.getHost().getName().equals(hostname) || conn.getHost().getName().equals(hostname.substring(0, hostname.indexOf('.')))) && conn.getLun() != null) { @@ -198,7 +211,7 @@ public class FlashArrayAdapter implements ProviderAdapter { } throw new RuntimeException("Volume connection identifier (lun/nsid) not found in existing connection"); } else { - throw new RuntimeException("Volume lun is not found in existing connection"); + throw new RuntimeException("Volume connection is not found in existing connection list"); } } else { throw e; @@ -291,6 +304,11 @@ public class FlashArrayAdapter implements ProviderAdapter { // Reverse the EUI-128 layout: serial = eui[2:16] + eui[22:32], after // stripping the optional "eui." prefix that appears in udev paths. String eui = address.startsWith("eui.") ? address.substring(4) : address; + if (eui == null || eui.length() != 32) { + throw new RuntimeException("Invalid NVMe-TCP EUI-128 address [" + + address + "]: expected 32 hex characters, got " + + (eui == null ? "null" : String.valueOf(eui.length()))); + } serial = (eui.substring(2, 16) + eui.substring(22)).toUpperCase(); } else { throw new RuntimeException( @@ -346,8 +364,11 @@ public class FlashArrayAdapter implements ProviderAdapter { "/volume-snapshots?source_names=" + sourceDataObject.getExternalName(), null, new TypeReference>() { }); - - return (FlashArrayVolume) getFlashArrayItem(list); + // Stamp the pool's volume address type so ProviderSnapshot.getAddress() + // emits an NVMe EUI-128 on NVMe-TCP pools. Without this, the adaptive + // driver persists the snapshot with an FC-style WWN and subsequent + // revert/list operations cannot locate the namespace. + return withAddressType((FlashArrayVolume) getFlashArrayItem(list)); } /** @@ -390,7 +411,12 @@ public class FlashArrayAdapter implements ProviderAdapter { "/volume-snapshots?names=" + dataObject.getExternalName(), new TypeReference>() { }); - return (FlashArrayVolume) getFlashArrayItem(list); + // Stamp the pool's volume address type so ProviderSnapshot.getAddress() + // emits an NVMe EUI-128 on NVMe-TCP pools instead of the FIBERWWN + // default. Without this, the adaptive driver persists the snapshot + // path with an FC-style WWN and revert/list fails to locate the + // namespace on the host. + return withAddressType((FlashArrayVolume) getFlashArrayItem(list)); } @Override @@ -813,7 +839,7 @@ public class FlashArrayAdapter implements ProviderAdapter { FlashArrayList list = GET("/volume-snapshots?names=" + snapshotName, new TypeReference>() { }); - return (FlashArrayVolume) getFlashArrayItem(list); + return withAddressType((FlashArrayVolume) getFlashArrayItem(list)); } private FlashArrayVolume withAddressType(FlashArrayVolume vol) { diff --git a/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayVolume.java b/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayVolume.java index 45d73586daa..8343283d995 100644 --- a/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayVolume.java +++ b/plugins/storage/volume/flasharray/src/main/java/org/apache/cloudstack/storage/datastore/adapter/flasharray/FlashArrayVolume.java @@ -116,6 +116,12 @@ public class FlashArrayVolume implements ProviderSnapshot { // 00 + serial[0:14] + + serial[14:24] // This is the value the Linux kernel exposes as // /dev/disk/by-id/nvme-eui. + if (serial.length() < 24) { + throw new RuntimeException("FlashArray serial [" + serial + + "] is too short to build an NVMe EUI-128 address " + + "(expected at least 24 hex characters, got " + + serial.length() + ")"); + } return ("00" + serial.substring(0, 14) + PURE_OUI_EUI + serial.substring(14)).toLowerCase(); } return ("6" + PURE_OUI + serial).toLowerCase();