mirror of https://github.com/apache/cloudstack.git
kvm/flasharray: address review feedback on NVMe-TCP PR
Apply the review comments from the first round on #13061: * FlashArrayAdapter.snapshot() and both getSnapshot() entry points now wrap the returned FlashArrayVolume in withAddressType(). Without this, snapshots taken against an NVMe-TCP pool had the constructor-default AddressType.FIBERWWN and ProviderSnapshot.getAddress() emitted an FC style WWN instead of the NVMe EUI-128, which the adaptive driver then persisted as the snapshot path. Verified end-to-end against Purity 6.7.7: a fresh NVMe-TCP snapshot now lands with install_path starting 006c... , matching the source volume's EUI (previously it was 6-24a9370...). * FlashArrayAdapter.attach() - retry path after 'Connection already exists' no longer requires a hostgroup-scoped match for NVMe-TCP. If hostgroup is not configured, or the existing connection is host-scoped, fall back to matching by host name, same as the Fibre Channel branch. Also normalize the 'volume lun is not found' message when no connection list is returned. * FlashArrayAdapter.attach() - initial 'Volume attach did not return lun information' exception message now mentions both lun (FC) and nsid (NVMe-TCP) so the error is not misleading on NVMe deployments. * FlashArrayAdapter.getVolumeByAddress() - validate the EUI-128 length before slicing. A short/malformed address used to throw StringIndexOutOfBoundsException deep inside getFlashArrayItem and be swallowed as 'not found'; now a clear RuntimeException is raised with the expected vs actual length. * FlashArrayVolume.getAddress() - same defensive check when building an EUI-128 from the FlashArray volume serial; if the serial is shorter than 24 hex chars, fail with a clear message instead of SIOOBE. * MultipathNVMeOFAdapterBase.connectPhysicalDisk() - Integer.parseInt of the STORAGE_POOL_DISK_WAIT detail is now guarded; a non-numeric value falls back to the default rather than aborting the connect. * MultipathNVMeOFAdapterBase.rescanAllControllers() - honour the boolean return from Process.waitFor(). If an nvme ns-rescan invocation does not complete in NS_RESCAN_TIMEOUT_SECS we destroyForcibly() it, so hung nvme-cli processes do not accumulate while the namespace poll loop retries. * NVMeTCPAdapter - rename LOGGER_NVMETCP to LOGGER to match the naming convention used in the other KVM adapters. Signed-off-by: Eugenio Grosso <eugenio.grosso@gmail.com>
This commit is contained in:
parent
c0cdfa41da
commit
723bf1445f
|
|
@ -185,7 +185,13 @@ public abstract class MultipathNVMeOFAdapterBase implements StorageAdaptor {
|
|||
if (details != null && details.containsKey(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString())) {
|
||||
String waitTime = details.get(com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString());
|
||||
if (StringUtils.isNotEmpty(waitTime)) {
|
||||
waitSecs = Integer.parseInt(waitTime);
|
||||
try {
|
||||
waitSecs = Integer.parseInt(waitTime);
|
||||
} catch (NumberFormatException e) {
|
||||
LOGGER.warn("Ignoring non-numeric " + com.cloud.storage.StorageManager.STORAGE_POOL_DISK_WAIT.toString()
|
||||
+ "=[" + waitTime + "] on pool " + pool.getUuid() + ", falling back to default "
|
||||
+ DEFAULT_DISK_WAIT_SECS + "s");
|
||||
}
|
||||
}
|
||||
}
|
||||
return waitForNamespace(address, pool, waitSecs);
|
||||
|
|
@ -234,7 +240,15 @@ public abstract class MultipathNVMeOFAdapterBase implements StorageAdaptor {
|
|||
for (File ctrl : ctrls) {
|
||||
Process p = new ProcessBuilder("nvme", "ns-rescan", "/dev/" + ctrl.getName())
|
||||
.redirectErrorStream(true).start();
|
||||
p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS);
|
||||
if (!p.waitFor(NS_RESCAN_TIMEOUT_SECS, TimeUnit.SECONDS)) {
|
||||
// Kill runaway nvme-cli invocations so they do not pile
|
||||
// up under the JVM on every poll iteration while we
|
||||
// are still waiting for the namespace to appear.
|
||||
LOGGER.debug("nvme ns-rescan /dev/" + ctrl.getName()
|
||||
+ " did not complete within " + NS_RESCAN_TIMEOUT_SECS
|
||||
+ "s; terminating");
|
||||
p.destroyForcibly();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOGGER.debug("nvme ns-rescan attempt failed: " + e.getMessage());
|
||||
|
|
|
|||
|
|
@ -28,10 +28,10 @@ import org.apache.logging.log4j.Logger;
|
|||
* {@link KVMStoragePoolManager} can find it via reflection.
|
||||
*/
|
||||
public class NVMeTCPAdapter extends MultipathNVMeOFAdapterBase {
|
||||
private static final Logger LOGGER_NVMETCP = LogManager.getLogger(NVMeTCPAdapter.class);
|
||||
private static final Logger LOGGER = LogManager.getLogger(NVMeTCPAdapter.class);
|
||||
|
||||
public NVMeTCPAdapter() {
|
||||
LOGGER_NVMETCP.info("Loaded NVMeTCPAdapter for StorageLayer");
|
||||
LOGGER.info("Loaded NVMeTCPAdapter for StorageLayer");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -160,7 +160,8 @@ public class FlashArrayAdapter implements ProviderAdapter {
|
|||
}
|
||||
|
||||
if (list == null || list.getItems() == null || list.getItems().size() == 0) {
|
||||
throw new RuntimeException("Volume attach did not return lun information");
|
||||
throw new RuntimeException("Volume attach did not return connection information "
|
||||
+ "(expected lun for Fibre Channel or nsid for NVMe-TCP)");
|
||||
}
|
||||
|
||||
FlashArrayConnection connection = (FlashArrayConnection) this.getFlashArrayItem(list);
|
||||
|
|
@ -186,10 +187,22 @@ public class FlashArrayAdapter implements ProviderAdapter {
|
|||
if (list != null && list.getItems() != null) {
|
||||
for (FlashArrayConnection conn : list.getItems()) {
|
||||
if (AddressType.NVMETCP.equals(volumeAddressType)) {
|
||||
if (conn.getHostGroup() != null && conn.getHostGroup().getName() != null
|
||||
// Prefer a hostgroup-scoped match when a hostgroup is configured
|
||||
// on the pool; otherwise fall through to matching the connection
|
||||
// by host like the Fibre Channel branch below. Covers both
|
||||
// transport=nvme-tcp deployments with and without hostgroup=.
|
||||
if (hostgroup != null && conn.getHostGroup() != null
|
||||
&& conn.getHostGroup().getName() != null
|
||||
&& conn.getHostGroup().getName().equals(hostgroup)) {
|
||||
return conn.getNsid() != null ? "" + conn.getNsid() : "1";
|
||||
}
|
||||
if (conn.getHost() != null && conn.getHost().getName() != null
|
||||
&& (conn.getHost().getName().equals(hostname)
|
||||
|| (hostname.indexOf('.') > 0
|
||||
&& conn.getHost().getName()
|
||||
.equals(hostname.substring(0, hostname.indexOf('.')))))) {
|
||||
return conn.getNsid() != null ? "" + conn.getNsid() : "1";
|
||||
}
|
||||
} else if (conn.getHost() != null && conn.getHost().getName() != null &&
|
||||
(conn.getHost().getName().equals(hostname) || conn.getHost().getName().equals(hostname.substring(0, hostname.indexOf('.')))) &&
|
||||
conn.getLun() != null) {
|
||||
|
|
@ -198,7 +211,7 @@ public class FlashArrayAdapter implements ProviderAdapter {
|
|||
}
|
||||
throw new RuntimeException("Volume connection identifier (lun/nsid) not found in existing connection");
|
||||
} else {
|
||||
throw new RuntimeException("Volume lun is not found in existing connection");
|
||||
throw new RuntimeException("Volume connection is not found in existing connection list");
|
||||
}
|
||||
} else {
|
||||
throw e;
|
||||
|
|
@ -291,6 +304,11 @@ public class FlashArrayAdapter implements ProviderAdapter {
|
|||
// Reverse the EUI-128 layout: serial = eui[2:16] + eui[22:32], after
|
||||
// stripping the optional "eui." prefix that appears in udev paths.
|
||||
String eui = address.startsWith("eui.") ? address.substring(4) : address;
|
||||
if (eui == null || eui.length() != 32) {
|
||||
throw new RuntimeException("Invalid NVMe-TCP EUI-128 address ["
|
||||
+ address + "]: expected 32 hex characters, got "
|
||||
+ (eui == null ? "null" : String.valueOf(eui.length())));
|
||||
}
|
||||
serial = (eui.substring(2, 16) + eui.substring(22)).toUpperCase();
|
||||
} else {
|
||||
throw new RuntimeException(
|
||||
|
|
@ -346,8 +364,11 @@ public class FlashArrayAdapter implements ProviderAdapter {
|
|||
"/volume-snapshots?source_names=" + sourceDataObject.getExternalName(), null,
|
||||
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
|
||||
});
|
||||
|
||||
return (FlashArrayVolume) getFlashArrayItem(list);
|
||||
// Stamp the pool's volume address type so ProviderSnapshot.getAddress()
|
||||
// emits an NVMe EUI-128 on NVMe-TCP pools. Without this, the adaptive
|
||||
// driver persists the snapshot with an FC-style WWN and subsequent
|
||||
// revert/list operations cannot locate the namespace.
|
||||
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -390,7 +411,12 @@ public class FlashArrayAdapter implements ProviderAdapter {
|
|||
"/volume-snapshots?names=" + dataObject.getExternalName(),
|
||||
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
|
||||
});
|
||||
return (FlashArrayVolume) getFlashArrayItem(list);
|
||||
// Stamp the pool's volume address type so ProviderSnapshot.getAddress()
|
||||
// emits an NVMe EUI-128 on NVMe-TCP pools instead of the FIBERWWN
|
||||
// default. Without this, the adaptive driver persists the snapshot
|
||||
// path with an FC-style WWN and revert/list fails to locate the
|
||||
// namespace on the host.
|
||||
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -813,7 +839,7 @@ public class FlashArrayAdapter implements ProviderAdapter {
|
|||
FlashArrayList<FlashArrayVolume> list = GET("/volume-snapshots?names=" + snapshotName,
|
||||
new TypeReference<FlashArrayList<FlashArrayVolume>>() {
|
||||
});
|
||||
return (FlashArrayVolume) getFlashArrayItem(list);
|
||||
return withAddressType((FlashArrayVolume) getFlashArrayItem(list));
|
||||
}
|
||||
|
||||
private FlashArrayVolume withAddressType(FlashArrayVolume vol) {
|
||||
|
|
|
|||
|
|
@ -116,6 +116,12 @@ public class FlashArrayVolume implements ProviderSnapshot {
|
|||
// 00 + serial[0:14] + <Pure OUI (24a937)> + serial[14:24]
|
||||
// This is the value the Linux kernel exposes as
|
||||
// /dev/disk/by-id/nvme-eui.<result>
|
||||
if (serial.length() < 24) {
|
||||
throw new RuntimeException("FlashArray serial [" + serial
|
||||
+ "] is too short to build an NVMe EUI-128 address "
|
||||
+ "(expected at least 24 hex characters, got "
|
||||
+ serial.length() + ")");
|
||||
}
|
||||
return ("00" + serial.substring(0, 14) + PURE_OUI_EUI + serial.substring(14)).toLowerCase();
|
||||
}
|
||||
return ("6" + PURE_OUI + serial).toLowerCase();
|
||||
|
|
|
|||
Loading…
Reference in New Issue