fix lockhost on creation of volumes from snap and fix bitmap issue when migrating a vol with incremental snap

This commit is contained in:
Pearl Dsilva 2026-03-27 14:08:20 -04:00
parent 237f074054
commit 058007e7ec
3 changed files with 246 additions and 5 deletions

View File

@ -27,6 +27,7 @@ import java.util.Objects;
import javax.inject.Inject;
import com.cloud.agent.api.to.DiskTO;
import com.cloud.storage.ClvmLockManager;
import com.cloud.storage.Storage;
import org.apache.cloudstack.engine.subsystem.api.storage.ClusterScope;
import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult;
@ -108,6 +109,8 @@ public class AncientDataMotionStrategy implements DataMotionStrategy {
StorageCacheManager cacheMgr;
@Inject
VolumeDataStoreDao volumeDataStoreDao;
@Inject
ClvmLockManager clvmLockManager;
@Inject
StorageManager storageManager;
@ -309,6 +312,8 @@ public class AncientDataMotionStrategy implements DataMotionStrategy {
ep = selector.select(srcData, volObj);
}
updateLockHostForVolume(ep, volObj);
CopyCommand cmd = new CopyCommand(srcData.getTO(), addFullCloneAndDiskprovisiongStrictnessFlagOnVMwareDest(volObj.getTO()), _createVolumeFromSnapshotWait, VirtualMachineManager.ExecuteInSequence.value());
Answer answer = null;
@ -331,6 +336,21 @@ public class AncientDataMotionStrategy implements DataMotionStrategy {
}
}
private void updateLockHostForVolume(EndPoint ep, DataObject volObj) {
if (ep != null && volObj instanceof VolumeInfo) {
VolumeInfo volumeInfo = (VolumeInfo) volObj;
StoragePool destPool = (StoragePool) volObj.getDataStore();
if (destPool != null && ClvmLockManager.isClvmPoolType(destPool.getPoolType())) {
Long hostId = ep.getId();
Long existingHostId = clvmLockManager.getClvmLockHostId(volumeInfo.getId(), volumeInfo.getUuid());
if (existingHostId == null) {
clvmLockManager.setClvmLockHostId(volumeInfo.getId(), hostId);
logger.debug("Set lock host ID {} for CLVM volume {} being created from snapshot", hostId, volumeInfo.getId());
}
}
}
}
protected Answer cloneVolume(DataObject template, DataObject volume) {
CopyCommand cmd = new CopyCommand(template.getTO(), addFullCloneAndDiskprovisiongStrictnessFlagOnVMwareDest(volume.getTO()), 0, VirtualMachineManager.ExecuteInSequence.value());
try {

View File

@ -449,11 +449,22 @@ public class DefaultEndPointSelector implements EndPointSelector {
DataStore store = object.getDataStore();
// This ensures volumes are created on the correct host with exclusive locks
if (object instanceof VolumeInfo && store.getRole() == DataStoreRole.Primary) {
VolumeInfo volInfo = (VolumeInfo) object;
EndPoint clvmEndpoint = selectClvmEndpointIfApplicable(volInfo, "volume creation");
if (clvmEndpoint != null) {
return clvmEndpoint;
String operation = "";
if (DataStoreRole.Primary == store.getRole()) {
VolumeInfo volume = null;
if (object instanceof VolumeInfo) {
volume = (VolumeInfo) object;
operation = "volume creation";
} else if (object instanceof SnapshotInfo) {
volume = ((SnapshotInfo) object).getBaseVolume();
operation = "snapshot creation";
}
if (volume != null) {
EndPoint clvmEndpoint = selectClvmEndpointIfApplicable(volume, operation);
if (clvmEndpoint != null) {
return clvmEndpoint;
}
}
}

View File

@ -2119,6 +2119,13 @@ public class KVMStorageProcessor implements StorageProcessor {
logger.debug("Taking incremental volume snapshot of volume [{}]. Snapshot will be copied to [{}].", volumeObjectTo,
ObjectUtils.defaultIfNull(secondaryPool, primaryPool));
try {
// For CLVM_NG incremental snapshots, validate bitmap before proceeding
SnapshotObjectTO bitmapValidationResult = validateClvmNgBitmapAndFallbackIfNeeded(snapshotObjectTO, primaryPool,
secondaryPool, secondaryPoolUrl, snapshotName, volumeObjectTo, conn, wait);
if (bitmapValidationResult != null) {
return bitmapValidationResult;
}
String vmName = String.format("DUMMY-VM-%s", snapshotName);
String vmXml = getVmXml(primaryPool, volumeObjectTo, vmName);
@ -2309,6 +2316,209 @@ public class KVMStorageProcessor implements StorageProcessor {
return immediateParentPath.substring(immediateParentPath.lastIndexOf(File.separator) + 1);
}
/**
* Checks if a QEMU bitmap exists in the volume and is usable (not in-use or corrupted).
* This is important after lock migration where bitmaps may be left in "in-use" state.
*
* @param pool The storage pool containing the volume
* @param volume The volume to check
* @param bitmapName The name of the bitmap to check
* @return true if bitmap exists and is usable, false if missing, in-use, or corrupted
*/
protected boolean isBitmapUsable(KVMStoragePool pool, VolumeObjectTO volume, String bitmapName) {
try {
String volumePath = pool.getLocalPathFor(volume.getPath());
String command = String.format("qemu-img info --output=json -U %s", volumePath);
String jsonOutput = Script.runSimpleBashScriptWithFullResult(command, 30);
if (jsonOutput == null || jsonOutput.trim().isEmpty()) {
logger.warn("Failed to get qemu-img info for volume [{}]", volumePath);
return false;
}
logger.debug("qemu-img info output for volume [{}]: {}", volumePath, jsonOutput);
try {
ObjectMapper mapper = new ObjectMapper();
JsonNode root = mapper.readTree(jsonOutput);
JsonNode formatSpecific = root.path("format-specific");
if (formatSpecific.isMissingNode()) {
logger.debug("No format-specific data found for volume [{}], bitmap check skipped", volumePath);
return false;
}
JsonNode data = formatSpecific.path("data");
JsonNode bitmaps = data.path("bitmaps");
if (bitmaps.isMissingNode() || !bitmaps.isArray()) {
logger.debug("No bitmaps found in volume [{}]", volumePath);
return false;
}
for (JsonNode bitmap : bitmaps) {
String name = bitmap.path("name").asText();
if (bitmapName.equals(name)) {
JsonNode flags = bitmap.path("flags");
if (flags.isArray()) {
for (JsonNode flag : flags) {
String flagValue = flag.asText();
if ("in-use".equals(flagValue)) {
logger.warn("Bitmap [{}] in volume [{}] is marked as 'in-use' and cannot be used for incremental snapshot",
bitmapName, volumePath);
return false;
}
}
}
logger.debug("Bitmap [{}] found in volume [{}] and is usable", bitmapName, volumePath);
return true;
}
}
logger.warn("Bitmap [{}] not found in volume [{}]", bitmapName, volumePath);
return false;
} catch (JsonProcessingException e) {
logger.error("Failed to parse qemu-img JSON output for volume [{}]: {}", volumePath, e.getMessage(), e);
return false;
}
} catch (Exception e) {
logger.error("Error checking bitmap [{}] for volume [{}]: {}", bitmapName, volume, e.getMessage(), e);
return false;
}
}
/**
* Removes a broken or unusable bitmap from a volume.
* Called before falling back to full snapshot to keep volume metadata clean.
*
* @param pool Storage pool containing the volume
* @param volume Volume with broken bitmap
* @param bitmapName Name of bitmap to remove
*/
private void cleanupBrokenBitmap(KVMStoragePool pool, VolumeObjectTO volume, String bitmapName) {
try {
String volumePath = pool.getLocalPathFor(volume.getPath());
logger.info("Removing broken bitmap [{}] from volume [{}] before taking full snapshot",
bitmapName, volumePath);
QemuImgFile volumeFile = new QemuImgFile(volumePath, PhysicalDiskFormat.QCOW2);
QemuImg qemuImg = new QemuImg(0);
try {
qemuImg.bitmap(QemuImg.BitmapOperation.Remove, volumeFile, bitmapName);
logger.info("Successfully removed broken bitmap [{}] from volume [{}]",
bitmapName, volume.getPath());
} catch (QemuImgException e) {
logger.warn("Failed to remove broken bitmap [{}] from volume [{}]: {}. " +
"Proceeding with fallback anyway.",
bitmapName, volume.getPath(), e.getMessage());
}
} catch (Exception e) {
logger.warn("Exception while cleaning up broken bitmap [{}] for volume [{}]: {}. " +
"Proceeding with fallback anyway.",
bitmapName, volume.getPath(), e.getMessage());
}
}
/**
* Validates QEMU bitmap for CLVM_NG incremental snapshots and falls back to full snapshot if needed.
* This method checks if the bitmap from the parent checkpoint is usable. If the bitmap is corrupted,
* in-use, or missing, it cleans up the broken bitmap and falls back to taking a full snapshot with
* a new checkpoint to restart the incremental chain.
*
* @param snapshotObjectTO Snapshot being created
* @param primaryPool Primary storage pool
* @param secondaryPool Secondary storage pool for backup
* @param secondaryPoolUrl Secondary pool URL
* @param snapshotName Name of the snapshot
* @param volumeObjectTo Volume being snapshotted
* @param conn Libvirt connection
* @param wait Timeout for operations
* @return SnapshotObjectTO if fallback to full snapshot occurred, null if validation passed
* @throws LibvirtException if libvirt operations fail
*/
private SnapshotObjectTO validateClvmNgBitmapAndFallbackIfNeeded(SnapshotObjectTO snapshotObjectTO,
KVMStoragePool primaryPool,
KVMStoragePool secondaryPool,
String secondaryPoolUrl,
String snapshotName,
VolumeObjectTO volumeObjectTo,
Connect conn,
int wait) throws LibvirtException {
if (primaryPool.getType() != StoragePoolType.CLVM_NG || snapshotObjectTO.getParentSnapshotPath() == null) {
return null;
}
String[] parents = snapshotObjectTO.getParents();
if (parents == null || parents.length == 0) {
return null;
}
String parentCheckpointName = getParentCheckpointName(parents);
logger.debug("Validating bitmap [{}] for CLVM_NG volume [{}] before taking incremental snapshot",
parentCheckpointName, volumeObjectTo.getPath());
if (!isBitmapUsable(primaryPool, volumeObjectTo, parentCheckpointName)) {
logger.warn("Bitmap [{}] is not usable for volume [{}]. Falling back to full snapshot with new checkpoint.",
parentCheckpointName, volumeObjectTo.getPath());
cleanupBrokenBitmap(primaryPool, volumeObjectTo, parentCheckpointName);
return takeFullVolumeSnapshotOfStoppedVmForIncremental(snapshotObjectTO, primaryPool, secondaryPool,
secondaryPoolUrl, snapshotName, volumeObjectTo, conn, wait);
}
logger.debug("Bitmap [{}] is valid and usable for incremental snapshot", parentCheckpointName);
return null;
}
/**
* Takes a full snapshot of a stopped VM and creates a new checkpoint to restart the incremental chain.
* This is used as a fallback when incremental snapshot fails due to bitmap issues.
*/
private SnapshotObjectTO takeFullVolumeSnapshotOfStoppedVmForIncremental(SnapshotObjectTO snapshotObjectTO,
KVMStoragePool primaryPool,
KVMStoragePool secondaryPool,
String secondaryPoolUrl,
String snapshotName,
VolumeObjectTO volumeObjectTo,
Connect conn,
int wait) throws LibvirtException {
resource.validateLibvirtAndQemuVersionForIncrementalSnapshots();
Domain vm = null;
logger.info("Taking full volume snapshot (with new checkpoint) of volume [{}] to restart incremental chain. " +
"Snapshot will be copied to [{}].", volumeObjectTo, ObjectUtils.defaultIfNull(secondaryPool, primaryPool));
try {
String vmName = String.format("DUMMY-VM-%s", snapshotName);
String vmXml = getVmXml(primaryPool, volumeObjectTo, vmName);
logger.debug("Creating dummy VM with volume [{}] to take a full snapshot with checkpoint.", volumeObjectTo);
resource.startVM(conn, vmName, vmXml, Domain.CreateFlags.PAUSED);
vm = resource.getDomain(conn, vmName);
SnapshotObjectTO fullSnapshotTO = new SnapshotObjectTO();
fullSnapshotTO.setPath(snapshotObjectTO.getPath());
fullSnapshotTO.setVolume(snapshotObjectTO.getVolume());
fullSnapshotTO.setParentSnapshotPath(null); // No parent - forces full snapshot
return takeIncrementalVolumeSnapshotOfRunningVm(fullSnapshotTO, primaryPool, secondaryPool,
secondaryPoolUrl, snapshotName, volumeObjectTo, vm, conn, wait);
} catch (InternalErrorException | LibvirtException | CloudRuntimeException e) {
logger.error("Failed to take full volume snapshot with checkpoint for volume [{}] due to {}.",
volumeObjectTo, e.getMessage(), e);
throw new CloudRuntimeException(e);
} finally {
if (vm != null) {
vm.destroy();
}
}
}
private Path createFileAndWrite(String content, String dir, String fileName) {
File dirFile = new File(dir);
if (!dirFile.exists()) {