kvm/flasharray: address review feedback on NVMe-TCP PR

Apply the review comments from the first round on #13061:

* FlashArrayAdapter.snapshot() and both getSnapshot() entry points now
  wrap the returned FlashArrayVolume in withAddressType(). Without this,
  snapshots taken against an NVMe-TCP pool had the constructor-default
  AddressType.FIBERWWN and ProviderSnapshot.getAddress() emitted an FC
  style WWN instead of the NVMe EUI-128, which the adaptive driver then
  persisted as the snapshot path. Verified end-to-end against Purity 6.7.7:
  a fresh NVMe-TCP snapshot now lands with install_path starting 006c... ,
  matching the source volume's EUI (previously it was 6-24a9370...).

* FlashArrayAdapter.attach() - retry path after 'Connection already
  exists' no longer requires a hostgroup-scoped match for NVMe-TCP. If
  hostgroup is not configured, or the existing connection is host-scoped,
  fall back to matching by host name, same as the Fibre Channel branch.
  Also normalize the 'volume lun is not found' message when no
  connection list is returned.

* FlashArrayAdapter.attach() - initial 'Volume attach did not return lun
  information' exception message now mentions both lun (FC) and nsid
  (NVMe-TCP) so the error is not misleading on NVMe deployments.

* FlashArrayAdapter.getVolumeByAddress() - validate the EUI-128 length
  before slicing. A short/malformed address used to throw
  StringIndexOutOfBoundsException deep inside getFlashArrayItem and be
  swallowed as 'not found'; now a clear RuntimeException is raised with
  the expected vs actual length.

* FlashArrayVolume.getAddress() - same defensive check when building an
  EUI-128 from the FlashArray volume serial; if the serial is shorter
  than 24 hex chars, fail with a clear message instead of SIOOBE.

* MultipathNVMeOFAdapterBase.connectPhysicalDisk() - Integer.parseInt of
  the STORAGE_POOL_DISK_WAIT detail is now guarded; a non-numeric value
  falls back to the default rather than aborting the connect.

* MultipathNVMeOFAdapterBase.rescanAllControllers() - honour the boolean
  return from Process.waitFor(). If an nvme ns-rescan invocation does
  not complete in NS_RESCAN_TIMEOUT_SECS we destroyForcibly() it, so
  hung nvme-cli processes do not accumulate while the namespace poll
  loop retries.

* NVMeTCPAdapter - rename LOGGER_NVMETCP to LOGGER to match the naming
  convention used in the other KVM adapters.

Signed-off-by: Eugenio Grosso <eugenio.grosso@gmail.com>
This commit is contained in:
Eugenio Grosso 2026-04-23 12:21:31 +00:00
parent c0cdfa41da
commit 723bf1445f
4 changed files with 57 additions and 11 deletions

View File

@ -185,7 +185,13 @@ public abstract class MultipathNVMeOFAdapterBase implements StorageAdaptor {
if (details != null && details.containsKey(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString())) {
String waitTime = details.get(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString());
if (StringUtils.isNotEmpty(waitTime)) {
waitSecs = Integer.parseInt(waitTime);
try {
waitSecs = Integer.parseInt(waitTime);
} catch (NumberFormatException e) {
LOGGER.warn("Ignoring non-numeric " + com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString()
+ "=[" + waitTime + "] on pool " + pool.getUuid() + ", falling back to default "
+ DEFAULT_DISK_WAIT_SECS + "s");
}
}
}
return waitForNamespace(address, pool, waitSecs);
@ -234,7 +240,15 @@ public abstract class MultipathNVMeOFAdapterBase implements StorageAdaptor {
for (File ctrl : ctrls) {
Process p = new ProcessBuilder("nvme", "ns-rescan", "/dev/" + ctrl.getName())
.redirectErrorStream(true).start();
p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS);
if (!p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS)) {
// Kill runaway nvme-cli invocations so they do not pile
// up under the JVM on every poll iteration while we
// are still waiting for the namespace to appear.
LOGGER.debug("nvme ns-rescan /dev/" + ctrl.getName()
+ " did not complete within " + NS_RESCAN_TIMEOUT_SECS
+ "s; terminating");
p.destroyForcibly();
}
}
} catch (Exception e) {
LOGGER.debug("nvme ns-rescan attempt failed: " + e.getMessage());

View File

@ -28,10 +28,10 @@ import org.apache.logging.log4j.Logger;
* {@link KVMStoragePoolManager} can find it via reflection.
*/
public class NVMeTCPAdapter extends MultipathNVMeOFAdapterBase {
private static final Logger LOGGER_NVMETCP = LogManager.getLogger(NVMeTCPAdapter.class);
private static final Logger LOGGER = LogManager.getLogger(NVMeTCPAdapter.class);
public NVMeTCPAdapter() {
LOGGER_NVMETCP.info("Loaded NVMeTCPAdapter for StorageLayer");
LOGGER.info("Loaded NVMeTCPAdapter for StorageLayer");
}
@Override

View File

@ -160,7 +160,8 @@ public class FlashArrayAdapter implements ProviderAdapter {
}
if (list == null || list.getItems() == null || list.getItems().size() == 0) {
throw new RuntimeException("Volume attach did not return lun information");
throw new RuntimeException("Volume attach did not return connection information "
+ "(expected lun for Fibre Channel or nsid for NVMe-TCP)");
}
FlashArrayConnection connection = (FlashArrayConnection) this.getFlashArrayItem(list);
@ -186,10 +187,22 @@ public class FlashArrayAdapter implements ProviderAdapter {
if (list != null && list.getItems() != null) {
for (FlashArrayConnection conn : list.getItems()) {
if (AddressType.NVMETCP.equals(volumeAddressType)) {
if (conn.getHostGroup() != null && conn.getHostGroup().getName() != null
// Prefer a hostgroup-scoped match when a hostgroup is configured
// on the pool; otherwise fall through to matching the connection
// by host like the Fibre Channel branch below. Covers both
// transport=nvme-tcp deployments with and without hostgroup=.
if (hostgroup != null && conn.getHostGroup() != null
&& conn.getHostGroup().getName() != null
&& conn.getHostGroup().getName().equals(hostgroup)) {
return conn.getNsid() != null ? "" + conn.getNsid() : "1";
}
if (conn.getHost() != null && conn.getHost().getName() != null
&& (conn.getHost().getName().equals(hostname)
|| (hostname.indexOf('.') > 0
&& conn.getHost().getName()
.equals(hostname.substring(0, hostname.indexOf('.')))))) {
return conn.getNsid() != null ? "" + conn.getNsid() : "1";
}
} else if (conn.getHost() != null && conn.getHost().getName() != null &&
(conn.getHost().getName().equals(hostname) || conn.getHost().getName().equals(hostname.substring(0, hostname.indexOf('.')))) &&
conn.getLun() != null) {
@ -198,7 +211,7 @@ public class FlashArrayAdapter implements ProviderAdapter {
}
throw new RuntimeException("Volume connection identifier (lun/nsid) not found in existing connection");
} else {
throw new RuntimeException("Volume lun is not found in existing connection");
throw new RuntimeException("Volume connection is not found in existing connection list");
}
} else {
throw e;
@ -291,6 +304,11 @@ public class FlashArrayAdapter implements ProviderAdapter {
// Reverse the EUI-128 layout: serial = eui[2:16] + eui[22:32], after
// stripping the optional "eui." prefix that appears in udev paths.
String eui = address.startsWith("eui.") ? address.substring(4) : address;
if (eui == null || eui.length() != 32) {
throw new RuntimeException("Invalid NVMe-TCP EUI-128 address ["
+ address + "]: expected 32 hex characters, got "
+ (eui == null ? "null" : String.valueOf(eui.length())));
}
serial = (eui.substring(2, 16) + eui.substring(22)).toUpperCase();
} else {
throw new RuntimeException(
@ -346,8 +364,11 @@ public class FlashArrayAdapter implements ProviderAdapter {
"/volume-snapshots?source_names=" + sourceDataObject.getExternalName(), null,
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
});
return (FlashArrayVolume) getFlashArrayItem(list);
// Stamp the pool's volume address type so ProviderSnapshot.getAddress()
// emits an NVMe EUI-128 on NVMe-TCP pools. Without this, the adaptive
// driver persists the snapshot with an FC-style WWN and subsequent
// revert/list operations cannot locate the namespace.
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
}
/**
@ -390,7 +411,12 @@ public class FlashArrayAdapter implements ProviderAdapter {
"/volume-snapshots?names=" + dataObject.getExternalName(),
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
});
return (FlashArrayVolume) getFlashArrayItem(list);
// Stamp the pool's volume address type so ProviderSnapshot.getAddress()
// emits an NVMe EUI-128 on NVMe-TCP pools instead of the FIBERWWN
// default. Without this, the adaptive driver persists the snapshot
// path with an FC-style WWN and revert/list fails to locate the
// namespace on the host.
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
}
@Override
@ -813,7 +839,7 @@ public class FlashArrayAdapter implements ProviderAdapter {
FlashArrayList<FlashArrayVolume> list = GET("/volume-snapshots?names=" + snapshotName,
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
});
return (FlashArrayVolume) getFlashArrayItem(list);
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
}
private FlashArrayVolume withAddressType(FlashArrayVolume vol) {

View File

@ -116,6 +116,12 @@ public class FlashArrayVolume implements ProviderSnapshot {
// 00 + serial[0:14] + <Pure OUI (24a937)> + serial[14:24]
// This is the value the Linux kernel exposes as
// /dev/disk/by-id/nvme-eui.<result>
if (serial.length() < 24) {
throw new RuntimeException("FlashArray serial [" + serial
+ "] is too short to build an NVMe EUI-128 address "
+ "(expected at least 24 hex characters, got "
+ serial.length() + ")");
}
return ("00" + serial.substring(0, 14) + PURE_OUI_EUI + serial.substring(14)).toLowerCase();
}
return ("6" + PURE_OUI + serial).toLowerCase();