From 058007e7ecc49b303acfedd7235b511ce7d6fa7c Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Fri, 27 Mar 2026 14:08:20 -0400 Subject: [PATCH] fix lockhost on creation of volumes from snap and fix bitmap issue when migrating a vol with incremental snap --- .../motion/AncientDataMotionStrategy.java | 20 ++ .../endpoint/DefaultEndPointSelector.java | 21 +- .../kvm/storage/KVMStorageProcessor.java | 210 ++++++++++++++++++ 3 files changed, 246 insertions(+), 5 deletions(-) diff --git a/engine/storage/datamotion/src/main/java/org/apache/cloudstack/storage/motion/AncientDataMotionStrategy.java b/engine/storage/datamotion/src/main/java/org/apache/cloudstack/storage/motion/AncientDataMotionStrategy.java index 8145158dfa4..e3b60bc625a 100644 --- a/engine/storage/datamotion/src/main/java/org/apache/cloudstack/storage/motion/AncientDataMotionStrategy.java +++ b/engine/storage/datamotion/src/main/java/org/apache/cloudstack/storage/motion/AncientDataMotionStrategy.java @@ -27,6 +27,7 @@ import java.util.Objects; import javax.inject.Inject; import com.cloud.agent.api.to.DiskTO; +import com.cloud.storage.ClvmLockManager; import com.cloud.storage.Storage; import org.apache.cloudstack.engine.subsystem.api.storage.ClusterScope; import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult; @@ -108,6 +109,8 @@ public class AncientDataMotionStrategy implements DataMotionStrategy { StorageCacheManager cacheMgr; @Inject VolumeDataStoreDao volumeDataStoreDao; + @Inject + ClvmLockManager clvmLockManager; @Inject StorageManager storageManager; @@ -309,6 +312,8 @@ public class AncientDataMotionStrategy implements DataMotionStrategy { ep = selector.select(srcData, volObj); } + updateLockHostForVolume(ep, volObj); + CopyCommand cmd = new CopyCommand(srcData.getTO(), addFullCloneAndDiskprovisiongStrictnessFlagOnVMwareDest(volObj.getTO()), _createVolumeFromSnapshotWait, VirtualMachineManager.ExecuteInSequence.value()); Answer answer = null; @@ -331,6 +336,21 @@ public class AncientDataMotionStrategy implements DataMotionStrategy { } } + private void updateLockHostForVolume(EndPoint ep, DataObject volObj) { + if (ep != null && volObj instanceof VolumeInfo) { + VolumeInfo volumeInfo = (VolumeInfo) volObj; + StoragePool destPool = (StoragePool) volObj.getDataStore(); + if (destPool != null && ClvmLockManager.isClvmPoolType(destPool.getPoolType())) { + Long hostId = ep.getId(); + Long existingHostId = clvmLockManager.getClvmLockHostId(volumeInfo.getId(), volumeInfo.getUuid()); + if (existingHostId == null) { + clvmLockManager.setClvmLockHostId(volumeInfo.getId(), hostId); + logger.debug("Set lock host ID {} for CLVM volume {} being created from snapshot", hostId, volumeInfo.getId()); + } + } + } + } + protected Answer cloneVolume(DataObject template, DataObject volume) { CopyCommand cmd = new CopyCommand(template.getTO(), addFullCloneAndDiskprovisiongStrictnessFlagOnVMwareDest(volume.getTO()), 0, VirtualMachineManager.ExecuteInSequence.value()); try { diff --git a/engine/storage/src/main/java/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java b/engine/storage/src/main/java/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java index fc2f263ee85..de06d22de5f 100644 --- a/engine/storage/src/main/java/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java +++ b/engine/storage/src/main/java/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java @@ -449,11 +449,22 @@ public class DefaultEndPointSelector implements EndPointSelector { DataStore store = object.getDataStore(); // This ensures volumes are created on the correct host with exclusive locks - if (object instanceof VolumeInfo && store.getRole() == DataStoreRole.Primary) { - VolumeInfo volInfo = (VolumeInfo) object; - EndPoint clvmEndpoint = selectClvmEndpointIfApplicable(volInfo, "volume creation"); - if (clvmEndpoint != null) { - return clvmEndpoint; + String operation = ""; + if (DataStoreRole.Primary == store.getRole()) { + VolumeInfo volume = null; + if (object instanceof VolumeInfo) { + volume = (VolumeInfo) object; + operation = "volume creation"; + } else if (object instanceof SnapshotInfo) { + volume = ((SnapshotInfo) object).getBaseVolume(); + operation = "snapshot creation"; + } + + if (volume != null) { + EndPoint clvmEndpoint = selectClvmEndpointIfApplicable(volume, operation); + if (clvmEndpoint != null) { + return clvmEndpoint; + } } } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java index a4d55dc6a7d..59e598082a2 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java @@ -2119,6 +2119,13 @@ public class KVMStorageProcessor implements StorageProcessor { logger.debug("Taking incremental volume snapshot of volume [{}]. Snapshot will be copied to [{}].", volumeObjectTo, ObjectUtils.defaultIfNull(secondaryPool, primaryPool)); try { + // For CLVM_NG incremental snapshots, validate bitmap before proceeding + SnapshotObjectTO bitmapValidationResult = validateClvmNgBitmapAndFallbackIfNeeded(snapshotObjectTO, primaryPool, + secondaryPool, secondaryPoolUrl, snapshotName, volumeObjectTo, conn, wait); + if (bitmapValidationResult != null) { + return bitmapValidationResult; + } + String vmName = String.format("DUMMY-VM-%s", snapshotName); String vmXml = getVmXml(primaryPool, volumeObjectTo, vmName); @@ -2309,6 +2316,209 @@ public class KVMStorageProcessor implements StorageProcessor { return immediateParentPath.substring(immediateParentPath.lastIndexOf(File.separator) + 1); } + /** + * Checks if a QEMU bitmap exists in the volume and is usable (not in-use or corrupted). + * This is important after lock migration where bitmaps may be left in "in-use" state. + * + * @param pool The storage pool containing the volume + * @param volume The volume to check + * @param bitmapName The name of the bitmap to check + * @return true if bitmap exists and is usable, false if missing, in-use, or corrupted + */ + protected boolean isBitmapUsable(KVMStoragePool pool, VolumeObjectTO volume, String bitmapName) { + try { + String volumePath = pool.getLocalPathFor(volume.getPath()); + + String command = String.format("qemu-img info --output=json -U %s", volumePath); + String jsonOutput = Script.runSimpleBashScriptWithFullResult(command, 30); + + if (jsonOutput == null || jsonOutput.trim().isEmpty()) { + logger.warn("Failed to get qemu-img info for volume [{}]", volumePath); + return false; + } + + logger.debug("qemu-img info output for volume [{}]: {}", volumePath, jsonOutput); + + try { + ObjectMapper mapper = new ObjectMapper(); + JsonNode root = mapper.readTree(jsonOutput); + + JsonNode formatSpecific = root.path("format-specific"); + if (formatSpecific.isMissingNode()) { + logger.debug("No format-specific data found for volume [{}], bitmap check skipped", volumePath); + return false; + } + + JsonNode data = formatSpecific.path("data"); + JsonNode bitmaps = data.path("bitmaps"); + + if (bitmaps.isMissingNode() || !bitmaps.isArray()) { + logger.debug("No bitmaps found in volume [{}]", volumePath); + return false; + } + + for (JsonNode bitmap : bitmaps) { + String name = bitmap.path("name").asText(); + if (bitmapName.equals(name)) { + JsonNode flags = bitmap.path("flags"); + if (flags.isArray()) { + for (JsonNode flag : flags) { + String flagValue = flag.asText(); + if ("in-use".equals(flagValue)) { + logger.warn("Bitmap [{}] in volume [{}] is marked as 'in-use' and cannot be used for incremental snapshot", + bitmapName, volumePath); + return false; + } + } + } + logger.debug("Bitmap [{}] found in volume [{}] and is usable", bitmapName, volumePath); + return true; + } + } + + logger.warn("Bitmap [{}] not found in volume [{}]", bitmapName, volumePath); + return false; + + } catch (JsonProcessingException e) { + logger.error("Failed to parse qemu-img JSON output for volume [{}]: {}", volumePath, e.getMessage(), e); + return false; + } + + } catch (Exception e) { + logger.error("Error checking bitmap [{}] for volume [{}]: {}", bitmapName, volume, e.getMessage(), e); + return false; + } + } + + /** + * Removes a broken or unusable bitmap from a volume. + * Called before falling back to full snapshot to keep volume metadata clean. + * + * @param pool Storage pool containing the volume + * @param volume Volume with broken bitmap + * @param bitmapName Name of bitmap to remove + */ + private void cleanupBrokenBitmap(KVMStoragePool pool, VolumeObjectTO volume, String bitmapName) { + try { + String volumePath = pool.getLocalPathFor(volume.getPath()); + + logger.info("Removing broken bitmap [{}] from volume [{}] before taking full snapshot", + bitmapName, volumePath); + + QemuImgFile volumeFile = new QemuImgFile(volumePath, PhysicalDiskFormat.QCOW2); + QemuImg qemuImg = new QemuImg(0); + + try { + qemuImg.bitmap(QemuImg.BitmapOperation.Remove, volumeFile, bitmapName); + logger.info("Successfully removed broken bitmap [{}] from volume [{}]", + bitmapName, volume.getPath()); + } catch (QemuImgException e) { + logger.warn("Failed to remove broken bitmap [{}] from volume [{}]: {}. " + + "Proceeding with fallback anyway.", + bitmapName, volume.getPath(), e.getMessage()); + } + + } catch (Exception e) { + logger.warn("Exception while cleaning up broken bitmap [{}] for volume [{}]: {}. " + + "Proceeding with fallback anyway.", + bitmapName, volume.getPath(), e.getMessage()); + } + } + + /** + * Validates QEMU bitmap for CLVM_NG incremental snapshots and falls back to full snapshot if needed. + * This method checks if the bitmap from the parent checkpoint is usable. If the bitmap is corrupted, + * in-use, or missing, it cleans up the broken bitmap and falls back to taking a full snapshot with + * a new checkpoint to restart the incremental chain. + * + * @param snapshotObjectTO Snapshot being created + * @param primaryPool Primary storage pool + * @param secondaryPool Secondary storage pool for backup + * @param secondaryPoolUrl Secondary pool URL + * @param snapshotName Name of the snapshot + * @param volumeObjectTo Volume being snapshotted + * @param conn Libvirt connection + * @param wait Timeout for operations + * @return SnapshotObjectTO if fallback to full snapshot occurred, null if validation passed + * @throws LibvirtException if libvirt operations fail + */ + private SnapshotObjectTO validateClvmNgBitmapAndFallbackIfNeeded(SnapshotObjectTO snapshotObjectTO, + KVMStoragePool primaryPool, + KVMStoragePool secondaryPool, + String secondaryPoolUrl, + String snapshotName, + VolumeObjectTO volumeObjectTo, + Connect conn, + int wait) throws LibvirtException { + if (primaryPool.getType() != StoragePoolType.CLVM_NG || snapshotObjectTO.getParentSnapshotPath() == null) { + return null; + } + + String[] parents = snapshotObjectTO.getParents(); + if (parents == null || parents.length == 0) { + return null; + } + + String parentCheckpointName = getParentCheckpointName(parents); + logger.debug("Validating bitmap [{}] for CLVM_NG volume [{}] before taking incremental snapshot", + parentCheckpointName, volumeObjectTo.getPath()); + + if (!isBitmapUsable(primaryPool, volumeObjectTo, parentCheckpointName)) { + logger.warn("Bitmap [{}] is not usable for volume [{}]. Falling back to full snapshot with new checkpoint.", + parentCheckpointName, volumeObjectTo.getPath()); + cleanupBrokenBitmap(primaryPool, volumeObjectTo, parentCheckpointName); + return takeFullVolumeSnapshotOfStoppedVmForIncremental(snapshotObjectTO, primaryPool, secondaryPool, + secondaryPoolUrl, snapshotName, volumeObjectTo, conn, wait); + } + + logger.debug("Bitmap [{}] is valid and usable for incremental snapshot", parentCheckpointName); + return null; + } + + /** + * Takes a full snapshot of a stopped VM and creates a new checkpoint to restart the incremental chain. + * This is used as a fallback when incremental snapshot fails due to bitmap issues. + */ + private SnapshotObjectTO takeFullVolumeSnapshotOfStoppedVmForIncremental(SnapshotObjectTO snapshotObjectTO, + KVMStoragePool primaryPool, + KVMStoragePool secondaryPool, + String secondaryPoolUrl, + String snapshotName, + VolumeObjectTO volumeObjectTo, + Connect conn, + int wait) throws LibvirtException { + resource.validateLibvirtAndQemuVersionForIncrementalSnapshots(); + Domain vm = null; + logger.info("Taking full volume snapshot (with new checkpoint) of volume [{}] to restart incremental chain. " + + "Snapshot will be copied to [{}].", volumeObjectTo, ObjectUtils.defaultIfNull(secondaryPool, primaryPool)); + try { + String vmName = String.format("DUMMY-VM-%s", snapshotName); + + String vmXml = getVmXml(primaryPool, volumeObjectTo, vmName); + + logger.debug("Creating dummy VM with volume [{}] to take a full snapshot with checkpoint.", volumeObjectTo); + resource.startVM(conn, vmName, vmXml, Domain.CreateFlags.PAUSED); + + vm = resource.getDomain(conn, vmName); + + SnapshotObjectTO fullSnapshotTO = new SnapshotObjectTO(); + fullSnapshotTO.setPath(snapshotObjectTO.getPath()); + fullSnapshotTO.setVolume(snapshotObjectTO.getVolume()); + fullSnapshotTO.setParentSnapshotPath(null); // No parent - forces full snapshot + + return takeIncrementalVolumeSnapshotOfRunningVm(fullSnapshotTO, primaryPool, secondaryPool, + secondaryPoolUrl, snapshotName, volumeObjectTo, vm, conn, wait); + } catch (InternalErrorException | LibvirtException | CloudRuntimeException e) { + logger.error("Failed to take full volume snapshot with checkpoint for volume [{}] due to {}.", + volumeObjectTo, e.getMessage(), e); + throw new CloudRuntimeException(e); + } finally { + if (vm != null) { + vm.destroy(); + } + } + } + private Path createFileAndWrite(String content, String dir, String fileName) { File dirFile = new File(dir); if (!dirFile.exists()) {