From 43e2f7504aad25a8d5ec46288d1dd86527ad079a Mon Sep 17 00:00:00 2001 From: James Peru Date: Mon, 27 Apr 2026 19:10:46 +0300 Subject: [PATCH] feat(backup): on-demand bitmap recreation for incremental NAS backup CloudStack rebuilds the libvirt domain XML on every VM start, which means persistent QEMU dirty bitmaps don't survive a stop/start cycle. Rather than hooking into the VM start lifecycle (intrusive across the orchestration layer), this commit handles the missing bitmap *lazily* at the next backup attempt: nasbackup.sh - When -M incremental is requested, the script first checks `virsh checkpoint-list` for the parent bitmap. If absent, it recreates the checkpoint on the running domain so libvirt accepts the reference. The next incremental will be larger than usual (it captures all writes since recreate, not since the previous incremental) but is correct; subsequent ones return to normal size. - On recreation, emits BITMAP_RECREATED= on stdout for the orchestrator to record. BackupAnswer + bitmapRecreated field surfaced from the agent. LibvirtTakeBackupCommandWrapper - Strips BITMAP_RECREATED= line from stdout before size parsing. - Sets answer.setBitmapRecreated(...). NASBackupChainKeys + BITMAP_RECREATED key for backup_details. NASBackupProvider - When the agent reports a recreated bitmap, persists it under backup_details and logs an info-level message so operators can correlate larger-than-usual incrementals with VM restarts. This satisfies the bitmap-loss-on-VM-restart concern from the RFC review without touching VirtualMachineManager / StartCommand / agent lifecycle. Refs: apache/cloudstack#12899 --- .../cloudstack/backup/BackupAnswer.java | 13 +++++++++++ .../cloudstack/backup/NASBackupChainKeys.java | 3 +++ .../cloudstack/backup/NASBackupProvider.java | 6 +++++ .../LibvirtTakeBackupCommandWrapper.java | 6 +++++ scripts/vm/hypervisor/kvm/nasbackup.sh | 23 +++++++++++++++++++ 5 files changed, 51 insertions(+) diff --git a/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java b/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java index 7882b1fa0a3..9e8282b16a8 100644 --- a/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java +++ b/core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java @@ -35,6 +35,11 @@ public class BackupAnswer extends Answer { // Set when an incremental was requested but the agent had to fall back to a full // (e.g. VM was stopped). Provider should record this backup as type=full. private Boolean incrementalFallback; + // Set when the agent had to recreate the parent bitmap before this incremental + // (e.g. CloudStack rebuilt the domain XML on the previous VM start, losing bitmaps). + // The first incremental after a recreate is larger than usual; subsequent + // incrementals return to normal size. Informational — recorded in backup_details. + private String bitmapRecreated; public BackupAnswer(final Command command, final boolean success, final String details) { super(command, success, details); @@ -90,4 +95,12 @@ public class BackupAnswer extends Answer { public void setIncrementalFallback(Boolean incrementalFallback) { this.incrementalFallback = incrementalFallback; } + + public String getBitmapRecreated() { + return bitmapRecreated; + } + + public void setBitmapRecreated(String bitmapRecreated) { + this.bitmapRecreated = bitmapRecreated; + } } diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java index 542844e19bf..a3e81188911 100644 --- a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java @@ -42,6 +42,9 @@ public final class NASBackupChainKeys { public static final String TYPE_FULL = "full"; public static final String TYPE_INCREMENTAL = "incremental"; + /** Set to the bitmap name when this incremental had to recreate its parent bitmap on the host (informational; this incremental is larger than usual). */ + public static final String BITMAP_RECREATED = "nas.bitmap_recreated"; + private NASBackupChainKeys() { } } diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java index c8c56a65dce..d4f95dfd048 100644 --- a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java @@ -434,6 +434,12 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co backupVO.setBackedUpVolumes(backupManager.createVolumeInfoFromVolumes(volumes)); if (backupDao.update(backupVO.getId(), backupVO)) { persistChainMetadata(backupVO, effective, answer.getBitmapCreated()); + if (answer.getBitmapRecreated() != null) { + backupDetailsDao.persist(new BackupDetailVO(backupVO.getId(), + NASBackupChainKeys.BITMAP_RECREATED, answer.getBitmapRecreated(), true)); + logger.info("NAS incremental for VM {} recreated parent bitmap {} (likely VM was restarted since last backup)", + vm.getInstanceName(), answer.getBitmapRecreated()); + } return new Pair<>(true, backupVO); } else { throw new CloudRuntimeException("Failed to update backup"); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java index 8427242fc1c..3654116869c 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java @@ -117,6 +117,7 @@ public class LibvirtTakeBackupCommandWrapper extends CommandWrapper/dev/null | grep -qx "$BITMAP_PARENT"; then + cat > $dest/recreate-checkpoint.xml <$BITMAP_PARENT +$(virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{printf "\n", $3}') + +XML + if ! virsh -c qemu:///system checkpoint-create "$VM" --xmlfile $dest/recreate-checkpoint.xml > /dev/null 2>&1; then + echo "Failed to recreate parent bitmap $BITMAP_PARENT for $VM" + cleanup + exit 1 + fi + # Marker for the orchestrator: this incremental is larger because the bitmap was rebuilt. + echo "BITMAP_RECREATED=$BITMAP_PARENT" + rm -f $dest/recreate-checkpoint.xml + fi + fi + # Build backup XML (and matching checkpoint XML when applicable). name="root" echo "" > $dest/backup.xml