This commit is contained in:
James Peru Mmbono 2026-05-12 12:39:38 +05:30 committed by GitHub
commit 0f151532cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 1219 additions and 32 deletions

View File

@ -29,6 +29,17 @@ public class BackupAnswer extends Answer {
private Long virtualSize;
private Map<String, String> volumes;
Boolean needsCleanup;
// Set by the NAS backup provider after a checkpoint/bitmap was created during this backup.
// The provider persists it in backup_details under NASBackupChainKeys.BITMAP_NAME.
private String bitmapCreated;
// Set when an incremental was requested but the agent had to fall back to a full
// (e.g. VM was stopped). Provider should record this backup as type=full.
private Boolean incrementalFallback;
// Set when the agent had to recreate the parent bitmap before this incremental
// (e.g. CloudStack rebuilt the domain XML on the previous VM start, losing bitmaps).
// The first incremental after a recreate is larger than usual; subsequent
// incrementals return to normal size. Informational recorded in backup_details.
private String bitmapRecreated;
public BackupAnswer(final Command command, final boolean success, final String details) {
super(command, success, details);
@ -68,4 +79,28 @@ public class BackupAnswer extends Answer {
public void setNeedsCleanup(Boolean needsCleanup) {
this.needsCleanup = needsCleanup;
}
public String getBitmapCreated() {
return bitmapCreated;
}
public void setBitmapCreated(String bitmapCreated) {
this.bitmapCreated = bitmapCreated;
}
public Boolean getIncrementalFallback() {
return incrementalFallback != null && incrementalFallback;
}
public void setIncrementalFallback(Boolean incrementalFallback) {
this.incrementalFallback = incrementalFallback;
}
public String getBitmapRecreated() {
return bitmapRecreated;
}
public void setBitmapRecreated(String bitmapRecreated) {
this.bitmapRecreated = bitmapRecreated;
}
}

View File

@ -0,0 +1,73 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package org.apache.cloudstack.backup;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.LogLevel;
/**
* Tells the KVM agent to rebase a NAS backup qcow2 onto a new backing parent. Used by the
* NAS backup provider during chain repair when a middle incremental is being deleted: the
* immediate child must absorb the soon-to-be-deleted parent's blocks and then re-link to
* the grandparent. Both target and new-backing paths are NAS-mount-relative.
*/
public class RebaseBackupCommand extends Command {
private String targetPath; // mount-relative path of the qcow2 to repoint
private String newBackingPath; // mount-relative path of the new backing parent
private String backupRepoType;
private String backupRepoAddress;
@LogLevel(LogLevel.Log4jLevel.Off)
private String mountOptions;
public RebaseBackupCommand(String targetPath, String newBackingPath,
String backupRepoType, String backupRepoAddress, String mountOptions) {
super();
this.targetPath = targetPath;
this.newBackingPath = newBackingPath;
this.backupRepoType = backupRepoType;
this.backupRepoAddress = backupRepoAddress;
this.mountOptions = mountOptions;
}
public String getTargetPath() {
return targetPath;
}
public String getNewBackingPath() {
return newBackingPath;
}
public String getBackupRepoType() {
return backupRepoType;
}
public String getBackupRepoAddress() {
return backupRepoAddress;
}
public String getMountOptions() {
return mountOptions == null ? "" : mountOptions;
}
@Override
public boolean executeInSequence() {
return true;
}
}

View File

@ -36,6 +36,12 @@ public class TakeBackupCommand extends Command {
@LogLevel(LogLevel.Log4jLevel.Off)
private String mountOptions;
// Incremental backup fields (NAS provider; null/empty for legacy full-only callers).
private String mode; // "full" or "incremental"; null => legacy behaviour (script default)
private String bitmapNew; // Checkpoint/bitmap name to create with this backup (timestamp-based)
private String bitmapParent; // Incremental: parent bitmap to read changes since
private String parentPath; // Incremental: parent backup file path on the mounted NAS (for qemu-img rebase)
public TakeBackupCommand(String vmName, String backupPath) {
super();
this.vmName = vmName;
@ -106,6 +112,38 @@ public class TakeBackupCommand extends Command {
this.quiesce = quiesce;
}
public String getMode() {
return mode;
}
public void setMode(String mode) {
this.mode = mode;
}
public String getBitmapNew() {
return bitmapNew;
}
public void setBitmapNew(String bitmapNew) {
this.bitmapNew = bitmapNew;
}
public String getBitmapParent() {
return bitmapParent;
}
public void setBitmapParent(String bitmapParent) {
this.bitmapParent = bitmapParent;
}
public String getParentPath() {
return parentPath;
}
public void setParentPath(String parentPath) {
this.parentPath = parentPath;
}
@Override
public boolean executeInSequence() {
return true;

View File

@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.backup;
/**
* Keys used by the NAS backup provider when storing incremental-chain metadata
* in the existing {@code backup_details} key/value table. Stored here (not on
* the {@code backups} table) so other providers do not need a schema change to
* support their own incremental implementations.
*/
public final class NASBackupChainKeys {
/** UUID of the parent backup (full or previous incremental). Empty for full backups. */
public static final String PARENT_BACKUP_ID = "nas.parent_backup_id";
/** QEMU dirty-bitmap name created by this backup, used as the {@code <incremental>} reference for the next one. */
public static final String BITMAP_NAME = "nas.bitmap_name";
/** Identifier shared by every backup in the same chain (the full anchors a chain; its incrementals inherit the id). */
public static final String CHAIN_ID = "nas.chain_id";
/** Position within the chain: 0 for the full, 1 for the first incremental, and so on. */
public static final String CHAIN_POSITION = "nas.chain_position";
/** Backup type marker: {@value #TYPE_FULL} or {@value #TYPE_INCREMENTAL}. Mirrors {@code backups.type} for fast lookup without a join. */
public static final String TYPE = "nas.type";
public static final String TYPE_FULL = "full";
public static final String TYPE_INCREMENTAL = "incremental";
/** Set to the bitmap name when this incremental had to recreate its parent bitmap on the host (informational; this incremental is larger than usual). */
public static final String BITMAP_RECREATED = "nas.bitmap_recreated";
private NASBackupChainKeys() {
}
}

View File

@ -48,6 +48,7 @@ import com.cloud.vm.snapshot.dao.VMSnapshotDetailsDao;
import org.apache.cloudstack.backup.dao.BackupDao;
import org.apache.cloudstack.backup.dao.BackupDetailsDao;
import org.apache.cloudstack.backup.dao.BackupRepositoryDao;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStore;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
@ -85,6 +86,16 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
true,
BackupFrameworkEnabled.key());
ConfigKey<Integer> NASBackupFullEvery = new ConfigKey<>("Advanced", Integer.class,
"nas.backup.full.every",
"10",
"Take a full NAS backup every Nth backup; remaining backups in between are incremental. " +
"Counts backups, not days, so it works for hourly, daily, and ad-hoc schedules. " +
"Set to 1 to disable incrementals (every backup is full).",
true,
ConfigKey.Scope.Zone,
BackupFrameworkEnabled.key());
@Inject
private BackupDao backupDao;
@ -130,6 +141,9 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
@Inject
private DiskOfferingDao diskOfferingDao;
@Inject
private BackupDetailsDao backupDetailsDao;
private Long getClusterIdFromRootVolume(VirtualMachine vm) {
VolumeVO rootVolume = volumeDao.getInstanceRootVolume(vm.getId());
StoragePoolVO rootDiskPool = primaryDataStoreDao.findById(rootVolume.getPoolId());
@ -168,6 +182,168 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
return resourceManager.findOneRandomRunningHostByHypervisor(Hypervisor.HypervisorType.KVM, vm.getDataCenterId());
}
/**
* Returned by {@link #decideChain(VirtualMachine)} to describe the next backup's place in
* the chain: full vs incremental, the bitmap name to create, and (for incrementals) the
* parent bitmap and parent file path.
*/
static final class ChainDecision {
final String mode; // "full" or "incremental"
final String bitmapNew;
final String bitmapParent; // null for full
final String parentPath; // null for full
final String chainId; // chain identifier this backup belongs to
final int chainPosition; // 0 for full, N for the Nth incremental in the chain
private ChainDecision(String mode, String bitmapNew, String bitmapParent, String parentPath,
String chainId, int chainPosition) {
this.mode = mode;
this.bitmapNew = bitmapNew;
this.bitmapParent = bitmapParent;
this.parentPath = parentPath;
this.chainId = chainId;
this.chainPosition = chainPosition;
}
static ChainDecision fullStart(String bitmapName) {
return new ChainDecision(NASBackupChainKeys.TYPE_FULL, bitmapName, null, null,
UUID.randomUUID().toString(), 0);
}
static ChainDecision incremental(String bitmapNew, String bitmapParent, String parentPath,
String chainId, int chainPosition) {
return new ChainDecision(NASBackupChainKeys.TYPE_INCREMENTAL, bitmapNew, bitmapParent,
parentPath, chainId, chainPosition);
}
boolean isIncremental() {
return NASBackupChainKeys.TYPE_INCREMENTAL.equals(mode);
}
}
/**
* Decides whether the next backup for {@code vm} should be a fresh full or an incremental
* appended to the existing chain. Stopped VMs are always full (libvirt {@code backup-begin}
* requires a running QEMU process). The {@code nas.backup.full.every} ConfigKey controls
* how many backups (full + incrementals) form one chain before a new full is forced.
*/
protected ChainDecision decideChain(VirtualMachine vm) {
final String newBitmap = "backup-" + System.currentTimeMillis() / 1000L;
// Stopped VMs cannot do incrementals script will also fall back, but we make the
// decision here so we register the right type up-front.
if (VirtualMachine.State.Stopped.equals(vm.getState())) {
return ChainDecision.fullStart(newBitmap);
}
Integer fullEvery = NASBackupFullEvery.valueIn(vm.getDataCenterId());
if (fullEvery == null || fullEvery <= 1) {
// Disabled or every-backup-is-full mode.
return ChainDecision.fullStart(newBitmap);
}
// Walk this VM's backups newestoldest, find the most recent BackedUp backup that has a
// bitmap stored. If we don't find one, this is the first backup in a chain start full.
List<Backup> history = backupDao.listByVmId(vm.getDataCenterId(), vm.getId());
if (history == null || history.isEmpty()) {
return ChainDecision.fullStart(newBitmap);
}
history.sort(Comparator.comparing(Backup::getDate).reversed());
Backup parent = null;
String parentBitmap = null;
String parentChainId = null;
int parentChainPosition = -1;
for (Backup b : history) {
if (!Backup.Status.BackedUp.equals(b.getStatus())) {
continue;
}
String bm = readDetail(b, NASBackupChainKeys.BITMAP_NAME);
if (bm == null) {
continue;
}
parent = b;
parentBitmap = bm;
parentChainId = readDetail(b, NASBackupChainKeys.CHAIN_ID);
String posStr = readDetail(b, NASBackupChainKeys.CHAIN_POSITION);
try {
parentChainPosition = posStr == null ? 0 : Integer.parseInt(posStr);
} catch (NumberFormatException e) {
parentChainPosition = 0;
}
break;
}
if (parent == null || parentBitmap == null || parentChainId == null) {
return ChainDecision.fullStart(newBitmap);
}
// Force a fresh full when the chain has reached the configured length.
if (parentChainPosition + 1 >= fullEvery) {
return ChainDecision.fullStart(newBitmap);
}
// The script needs the parent backup's on-NAS file path so it can rebase the new
// qcow2 onto it. The path is stored relative to the NAS mount point the script
// resolves it inside its mount session.
String parentPath = composeParentBackupPath(parent);
return ChainDecision.incremental(newBitmap, parentBitmap, parentPath,
parentChainId, parentChainPosition + 1);
}
private String readDetail(Backup backup, String key) {
BackupDetailVO d = backupDetailsDao.findDetail(backup.getId(), key);
return d == null ? null : d.getValue();
}
/**
* Compose the on-NAS path of a parent backup's root-disk qcow2. Relative to the NAS mount,
* matches the layout written by {@code nasbackup.sh} ({@code <backupPath>/root.<volUuid>.qcow2}).
*/
private String composeParentBackupPath(Backup parent) {
// backupPath is stored as externalId by createBackupObject e.g. "i-2-1234-VM/2026.04.27.13.45.00".
// Volume UUID for the root volume is what the script keys backup files on.
VolumeVO rootVolume = volumeDao.getInstanceRootVolume(parent.getVmId());
String volUuid = rootVolume == null ? "root" : rootVolume.getUuid();
return parent.getExternalId() + "/root." + volUuid + ".qcow2";
}
/**
* Persist chain metadata under backup_details. Stored here (not on the backups table) so
* other providers can implement their own chain semantics without schema changes.
*/
private void persistChainMetadata(Backup backup, ChainDecision decision, String bitmapFromAgent) {
// Prefer the bitmap name confirmed by the agent (BITMAP_CREATED= line). Fall back to
// what we asked it to create they should match.
String bitmap = bitmapFromAgent != null ? bitmapFromAgent : decision.bitmapNew;
if (bitmap != null) {
backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.BITMAP_NAME, bitmap, true));
}
backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.CHAIN_ID, decision.chainId, true));
backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.CHAIN_POSITION,
String.valueOf(decision.chainPosition), true));
backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.TYPE, decision.mode, true));
if (decision.isIncremental()) {
// Resolve the parent backup's UUID so restore can walk the chain by id, not by path.
String parentUuid = lookupParentBackupUuid(backup.getVmId(), decision.bitmapParent);
if (parentUuid != null) {
backupDetailsDao.persist(new BackupDetailVO(backup.getId(), NASBackupChainKeys.PARENT_BACKUP_ID, parentUuid, true));
}
}
}
private String lookupParentBackupUuid(long vmId, String parentBitmap) {
if (parentBitmap == null) {
return null;
}
for (Backup b : backupDao.listByVmId(null, vmId)) {
String bm = readDetail(b, NASBackupChainKeys.BITMAP_NAME);
if (parentBitmap.equals(bm)) {
return b.getUuid();
}
}
return null;
}
protected Host getVMHypervisorHostForBackup(VirtualMachine vm) {
Long hostId = vm.getHostId();
if (hostId == null && VirtualMachine.State.Running.equals(vm.getState())) {
@ -205,12 +381,20 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
final String backupPath = String.format("%s/%s", vm.getInstanceName(),
new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(creationDate));
BackupVO backupVO = createBackupObject(vm, backupPath);
// Decide full vs incremental for this backup. Stopped VMs are always full
// (libvirt backup-begin requires a running QEMU process).
ChainDecision decision = decideChain(vm);
BackupVO backupVO = createBackupObject(vm, backupPath, decision.isIncremental() ? "INCREMENTAL" : "FULL");
TakeBackupCommand command = new TakeBackupCommand(vm.getInstanceName(), backupPath);
command.setBackupRepoType(backupRepository.getType());
command.setBackupRepoAddress(backupRepository.getAddress());
command.setMountOptions(backupRepository.getMountOptions());
command.setQuiesce(quiesceVM);
command.setMode(decision.mode);
command.setBitmapNew(decision.bitmapNew);
command.setBitmapParent(decision.bitmapParent);
command.setParentPath(decision.parentPath);
if (VirtualMachine.State.Stopped.equals(vm.getState())) {
List<VolumeVO> vmVolumes = volumeDao.findByInstance(vm.getId());
@ -239,9 +423,23 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
backupVO.setDate(new Date());
backupVO.setSize(answer.getSize());
backupVO.setStatus(Backup.Status.BackedUp);
// If the agent fell back to full (stopped VM mid-incremental cycle), record this
// backup as a full and start a new chain.
ChainDecision effective = decision;
if (answer.getIncrementalFallback()) {
effective = ChainDecision.fullStart(decision.bitmapNew);
backupVO.setType("FULL");
}
List<Volume> volumes = new ArrayList<>(volumeDao.findByInstance(vm.getId()));
backupVO.setBackedUpVolumes(backupManager.createVolumeInfoFromVolumes(volumes));
if (backupDao.update(backupVO.getId(), backupVO)) {
persistChainMetadata(backupVO, effective, answer.getBitmapCreated());
if (answer.getBitmapRecreated() != null) {
backupDetailsDao.persist(new BackupDetailVO(backupVO.getId(),
NASBackupChainKeys.BITMAP_RECREATED, answer.getBitmapRecreated(), true));
logger.info("NAS incremental for VM {} recreated parent bitmap {} (likely VM was restarted since last backup)",
vm.getInstanceName(), answer.getBitmapRecreated());
}
return new Pair<>(true, backupVO);
} else {
throw new CloudRuntimeException("Failed to update backup");
@ -260,11 +458,11 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
}
}
private BackupVO createBackupObject(VirtualMachine vm, String backupPath) {
private BackupVO createBackupObject(VirtualMachine vm, String backupPath, String type) {
BackupVO backup = new BackupVO();
backup.setVmId(vm.getId());
backup.setExternalId(backupPath);
backup.setType("FULL");
backup.setType(type);
backup.setDate(new Date());
long virtualSize = 0L;
for (final Volume volume: volumeDao.findByInstance(vm.getId())) {
@ -495,24 +693,244 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
throw new CloudRuntimeException(String.format("Unable to find a running KVM host in zone %d to delete backup %s", backup.getZoneId(), backup.getUuid()));
}
DeleteBackupCommand command = new DeleteBackupCommand(backup.getExternalId(), backupRepository.getType(),
backupRepository.getAddress(), backupRepository.getMountOptions());
// Repair the chain (if any) before removing the backup file. For chained backups,
// children that point at this backup must be re-pointed at this backup's parent
// (with their blocks merged via qemu-img rebase). For a full at the head of a chain
// with surviving children, refuse unless forced `forced=true` then deletes the
// full plus every descendant.
ChainRepairPlan plan = computeChainRepair(backup, forced);
if (!plan.proceed) {
throw new CloudRuntimeException(plan.reason);
}
BackupAnswer answer;
// Issue rebase commands for each child that needs re-pointing (ordered so each rebase
// operates on a chain that still resolves: children first if there are nested ones).
for (RebaseStep step : plan.rebaseSteps) {
RebaseBackupCommand rebase = new RebaseBackupCommand(step.targetMountRelativePath,
step.newBackingMountRelativePath, backupRepository.getType(),
backupRepository.getAddress(), backupRepository.getMountOptions());
BackupAnswer rebaseAnswer;
try {
rebaseAnswer = (BackupAnswer) agentManager.send(host.getId(), rebase);
} catch (AgentUnavailableException e) {
throw new CloudRuntimeException("Unable to contact backend control plane to repair backup chain");
} catch (OperationTimedoutException e) {
throw new CloudRuntimeException("Backup chain repair (rebase) timed out, please try again");
}
if (rebaseAnswer == null || !rebaseAnswer.getResult()) {
throw new CloudRuntimeException(String.format(
"Backup chain repair failed: rebase of %s onto %s returned %s",
step.targetMountRelativePath, step.newBackingMountRelativePath,
rebaseAnswer == null ? "no answer" : rebaseAnswer.getDetails()));
}
// Update the rebased child's parent reference + position in backup_details.
BackupDetailVO parentDetail = backupDetailsDao.findDetail(step.childBackupId, NASBackupChainKeys.PARENT_BACKUP_ID);
if (parentDetail != null) {
parentDetail.setValue(step.newParentUuid == null ? "" : step.newParentUuid);
backupDetailsDao.update(parentDetail.getId(), parentDetail);
} else if (step.newParentUuid != null) {
backupDetailsDao.persist(new BackupDetailVO(step.childBackupId,
NASBackupChainKeys.PARENT_BACKUP_ID, step.newParentUuid, true));
}
BackupDetailVO posDetail = backupDetailsDao.findDetail(step.childBackupId, NASBackupChainKeys.CHAIN_POSITION);
if (posDetail != null) {
posDetail.setValue(String.valueOf(step.newChainPosition));
backupDetailsDao.update(posDetail.getId(), posDetail);
}
}
// Now delete this backup's files. For a forced delete of a full with descendants we
// also delete all descendants' files (newest first so each rm targets a leaf).
for (Backup victim : plan.toDelete) {
DeleteBackupCommand command = new DeleteBackupCommand(victim.getExternalId(), backupRepository.getType(),
backupRepository.getAddress(), backupRepository.getMountOptions());
BackupAnswer answer;
try {
answer = (BackupAnswer) agentManager.send(host.getId(), command);
} catch (AgentUnavailableException e) {
throw new CloudRuntimeException("Unable to contact backend control plane to initiate backup");
} catch (OperationTimedoutException e) {
throw new CloudRuntimeException("Operation to delete backup timed out, please try again");
}
if (answer == null || !answer.getResult()) {
logger.warn("Failed to delete backup file for {} ({}); leaving DB row intact", victim.getUuid(), victim.getExternalId());
return false;
}
backupDao.remove(victim.getId());
}
// Shift chain_position down by 1 for any survivors deeper in the chain than the
// backup we just removed (their direct parent reference is unchanged, but their
// numeric position needs to stay consistent so future full-every cadence math works).
if (plan.shiftPositionsBelow != null) {
for (Backup b : backupDao.listByVmId(null, backup.getVmId())) {
if (!plan.shiftPositionsBelow.chainId.equals(readDetail(b, NASBackupChainKeys.CHAIN_ID))) {
continue;
}
int pos = chainPosition(b);
if (pos > plan.shiftPositionsBelow.afterPosition && pos != Integer.MAX_VALUE) {
BackupDetailVO posDetail = backupDetailsDao.findDetail(b.getId(), NASBackupChainKeys.CHAIN_POSITION);
if (posDetail != null) {
posDetail.setValue(String.valueOf(pos - 1));
backupDetailsDao.update(posDetail.getId(), posDetail);
}
}
}
}
return true;
}
private static final class PositionShift {
final String chainId;
final int afterPosition; // shift positions strictly greater than this by -1
PositionShift(String chainId, int afterPosition) {
this.chainId = chainId;
this.afterPosition = afterPosition;
}
}
/**
* Result of {@link #computeChainRepair}: whether to proceed, what to rebase, what to delete.
*/
private static final class ChainRepairPlan {
final boolean proceed;
final String reason;
final List<RebaseStep> rebaseSteps;
final List<Backup> toDelete;
final PositionShift shiftPositionsBelow;
private ChainRepairPlan(boolean proceed, String reason, List<RebaseStep> rebaseSteps, List<Backup> toDelete,
PositionShift shiftPositionsBelow) {
this.proceed = proceed;
this.reason = reason;
this.rebaseSteps = rebaseSteps;
this.toDelete = toDelete;
this.shiftPositionsBelow = shiftPositionsBelow;
}
static ChainRepairPlan refuse(String reason) {
return new ChainRepairPlan(false, reason, Collections.emptyList(), Collections.emptyList(), null);
}
static ChainRepairPlan proceed(List<RebaseStep> rebaseSteps, List<Backup> toDelete) {
return new ChainRepairPlan(true, null, rebaseSteps, toDelete, null);
}
static ChainRepairPlan proceed(List<RebaseStep> rebaseSteps, List<Backup> toDelete, PositionShift shift) {
return new ChainRepairPlan(true, null, rebaseSteps, toDelete, shift);
}
}
private static final class RebaseStep {
final long childBackupId;
final String targetMountRelativePath;
final String newBackingMountRelativePath;
final String newParentUuid; // null when re-pointed onto an existing full's UUID is desired but unavailable
final int newChainPosition;
RebaseStep(long childBackupId, String targetMountRelativePath, String newBackingMountRelativePath,
String newParentUuid, int newChainPosition) {
this.childBackupId = childBackupId;
this.targetMountRelativePath = targetMountRelativePath;
this.newBackingMountRelativePath = newBackingMountRelativePath;
this.newParentUuid = newParentUuid;
this.newChainPosition = newChainPosition;
}
}
/**
* Compute the chain-repair plan for deleting {@code backup}. Conservative semantics:
* - Backups outside any tracked chain (no NAS chain metadata) are deleted as-is.
* - A standalone backup with no children is deleted as-is.
* - A middle incremental: rebase its immediate child onto its own parent, then delete it.
* Descendants of that child are unaffected (their backing chain still resolves).
* - A full with surviving descendants: refuse unless {@code forced=true}; then delete
* full + every descendant (newest first).
*/
private ChainRepairPlan computeChainRepair(Backup backup, boolean forced) {
String chainId = readDetail(backup, NASBackupChainKeys.CHAIN_ID);
if (chainId == null) {
// Pre-incremental backups (or callers that never wrote chain metadata) single delete.
return ChainRepairPlan.proceed(Collections.emptyList(), Collections.singletonList(backup));
}
// Gather every backup in the same chain for this VM.
List<Backup> chain = new ArrayList<>();
for (Backup b : backupDao.listByVmId(null, backup.getVmId())) {
if (chainId.equals(readDetail(b, NASBackupChainKeys.CHAIN_ID))) {
chain.add(b);
}
}
chain.sort(Comparator.comparingInt(b -> chainPosition(b)));
int targetPos = chainPosition(backup);
boolean isFull = targetPos == 0;
List<Backup> descendants = chain.stream()
.filter(b -> chainPosition(b) > targetPos)
.collect(Collectors.toList());
if (isFull) {
if (descendants.isEmpty()) {
return ChainRepairPlan.proceed(Collections.emptyList(), Collections.singletonList(backup));
}
if (!forced) {
return ChainRepairPlan.refuse(String.format(
"Backup %s is the full anchor of a chain with %d incremental(s). Delete the incrementals first, " +
"or pass forced=true to remove the entire chain.",
backup.getUuid(), descendants.size()));
}
// Forced delete: remove descendants newest first, then the full.
List<Backup> victims = new ArrayList<>(descendants);
victims.sort(Comparator.comparingInt((Backup b) -> chainPosition(b)).reversed());
victims.add(backup);
return ChainRepairPlan.proceed(Collections.emptyList(), victims);
}
// Middle (or tail) incremental.
if (descendants.isEmpty()) {
// Tail: nothing to rebase, just delete.
return ChainRepairPlan.proceed(Collections.emptyList(), Collections.singletonList(backup));
}
// Middle: only the immediate child needs to absorb our blocks and rebase onto our parent.
Backup immediateChild = descendants.stream()
.min(Comparator.comparingInt(b -> chainPosition(b)))
.orElseThrow(() -> new CloudRuntimeException("Internal error: no immediate child found for chain repair"));
Backup ourParent = chain.stream()
.filter(b -> chainPosition(b) == targetPos - 1)
.findFirst()
.orElseThrow(() -> new CloudRuntimeException(String.format(
"Cannot delete %s: its parent (chain_position=%d) is missing from the chain",
backup.getUuid(), targetPos - 1)));
VolumeVO rootVolume = volumeDao.getInstanceRootVolume(backup.getVmId());
String volUuid = rootVolume == null ? "root" : rootVolume.getUuid();
String childPath = immediateChild.getExternalId() + "/root." + volUuid + ".qcow2";
String parentPath = ourParent.getExternalId() + "/root." + volUuid + ".qcow2";
RebaseStep step = new RebaseStep(immediateChild.getId(), childPath, parentPath,
ourParent.getUuid(), chainPosition(immediateChild) - 1);
// After we delete the middle backup, every descendant's numeric chain_position
// becomes stale (off by one). Their backing-file pointers don't need re-writing
// (only the immediate child changed parents) but their position metadata does.
return ChainRepairPlan.proceed(
Collections.singletonList(step),
Collections.singletonList(backup),
new PositionShift(chainId, targetPos));
}
private int chainPosition(Backup b) {
String s = readDetail(b, NASBackupChainKeys.CHAIN_POSITION);
if (s == null) {
return Integer.MAX_VALUE; // no metadata => sort to end
}
try {
answer = (BackupAnswer) agentManager.send(host.getId(), command);
} catch (AgentUnavailableException e) {
throw new CloudRuntimeException("Unable to contact backend control plane to initiate backup");
} catch (OperationTimedoutException e) {
throw new CloudRuntimeException("Operation to delete backup timed out, please try again");
return Integer.parseInt(s);
} catch (NumberFormatException e) {
return Integer.MAX_VALUE;
}
if (answer != null && answer.getResult()) {
return backupDao.remove(backup.getId());
}
logger.debug("There was an error removing the backup with id {}", backup.getId());
return false;
}
public void syncBackupMetrics(Long zoneId) {
@ -629,7 +1047,8 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey[]{
NASBackupRestoreMountTimeout
NASBackupRestoreMountTimeout,
NASBackupFullEvery
};
}

View File

@ -52,6 +52,7 @@ import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.dao.VMInstanceDao;
import org.apache.cloudstack.backup.dao.BackupDao;
import org.apache.cloudstack.backup.dao.BackupDetailsDao;
import org.apache.cloudstack.backup.dao.BackupRepositoryDao;
import org.apache.cloudstack.backup.dao.BackupOfferingDao;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
@ -96,6 +97,9 @@ public class NASBackupProviderTest {
@Mock
private VMSnapshotDao vmSnapshotDaoMock;
@Mock
private BackupDetailsDao backupDetailsDao;
@Test
public void testDeleteBackup() throws OperationTimedoutException, AgentUnavailableException {
Long hostId = 1L;

View File

@ -0,0 +1,59 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package com.cloud.hypervisor.kvm.resource.wrapper;
import com.cloud.agent.api.Answer;
import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource;
import com.cloud.resource.CommandWrapper;
import com.cloud.resource.ResourceWrapper;
import com.cloud.utils.Pair;
import com.cloud.utils.script.Script;
import org.apache.cloudstack.backup.BackupAnswer;
import org.apache.cloudstack.backup.RebaseBackupCommand;
import java.util.ArrayList;
import java.util.List;
@ResourceWrapper(handles = RebaseBackupCommand.class)
public class LibvirtRebaseBackupCommandWrapper extends CommandWrapper<RebaseBackupCommand, Answer, LibvirtComputingResource> {
@Override
public Answer execute(RebaseBackupCommand command, LibvirtComputingResource libvirtComputingResource) {
List<String[]> commands = new ArrayList<>();
commands.add(new String[]{
libvirtComputingResource.getNasBackupPath(),
"-o", "rebase",
"-t", command.getBackupRepoType(),
"-s", command.getBackupRepoAddress(),
"-m", command.getMountOptions(),
"--rebase-target", command.getTargetPath(),
"--rebase-new-backing", command.getNewBackingPath()
});
Pair<Integer, String> result = Script.executePipedCommands(commands, libvirtComputingResource.getCmdsTimeout());
logger.debug("Backup rebase result: {} , exit code: {}", result.second(), result.first());
if (result.first() != 0) {
logger.warn("Failed to rebase backup file {} onto {}: {}",
command.getTargetPath(), command.getNewBackingPath(), result.second());
return new BackupAnswer(command, false, result.second());
}
return new BackupAnswer(command, true, null);
}
}

View File

@ -60,6 +60,15 @@ public class LibvirtRestoreBackupCommandWrapper extends CommandWrapper<RestoreBa
private static final String ATTACH_RBD_DISK_XML_COMMAND = " virsh attach-device %s /dev/stdin <<EOF%sEOF";
private static final String CURRRENT_DEVICE = "virsh domblklist --domain %s | tail -n 3 | head -n 1 | awk '{print $1}'";
private static final String RSYNC_COMMAND = "rsync -az %s %s";
// Flattens the backing-file chain into a single self-contained qcow2 written to the
// destination volume path. Used when the source backup is an incremental whose qcow2
// has a backing reference to its parent (chain set up by nasbackup.sh's qemu-img rebase).
private static final String QEMU_IMG_FLATTEN_COMMAND = "qemu-img convert -O qcow2 %s %s";
// Detects whether a qcow2 file references a parent in its backing-file metadata.
// Returns 0 (true) when a backing file is present, 1 when not. Uses --output=json
// so the test is robust to qemu-img version differences in human-readable output.
private static final String QEMU_IMG_HAS_BACKING_COMMAND =
"qemu-img info --output=json %s 2>/dev/null | grep -q '\"backing-filename\"'";
private String getVolumeUuidFromPath(String volumePath, PrimaryDataStoreTO volumePool) {
if (Storage.StoragePoolType.Linstor.equals(volumePool.getPoolType())) {
@ -270,10 +279,27 @@ public class LibvirtRestoreBackupCommandWrapper extends CommandWrapper<RestoreBa
return replaceBlockDeviceWithBackup(storagePoolMgr, volumePool, volumePath, backupPath, timeout, createTargetVolume, size);
}
// For NAS-backed incremental backups, the source qcow2 has a backing-file
// reference to its parent (set by nasbackup.sh's qemu-img rebase). A plain
// rsync would copy only the differential blocks, leaving a volume that
// depends on a backing file the primary storage doesn't have. Flatten the
// chain via qemu-img convert, which follows the backing-file links and
// produces a single self-contained qcow2.
if (hasBackingChain(backupPath)) {
int flattenExit = Script.runSimpleBashScriptForExitValue(
String.format(QEMU_IMG_FLATTEN_COMMAND, backupPath, volumePath), timeout, false);
return flattenExit == 0;
}
int exitValue = Script.runSimpleBashScriptForExitValue(String.format(RSYNC_COMMAND, backupPath, volumePath), timeout, false);
return exitValue == 0;
}
private boolean hasBackingChain(String qcow2Path) {
return Script.runSimpleBashScriptForExitValue(
String.format(QEMU_IMG_HAS_BACKING_COMMAND, qcow2Path)) == 0;
}
private boolean replaceBlockDeviceWithBackup(KVMStoragePoolManager storagePoolMgr, PrimaryDataStoreTO volumePool, String volumePath, String backupPath, int timeout, boolean createTargetVolume, Long size) {
KVMStoragePool volumeStoragePool = storagePoolMgr.getStoragePool(volumePool.getPoolType(), volumePool.getUuid());
QemuImg qemu;

View File

@ -69,8 +69,7 @@ public class LibvirtTakeBackupCommandWrapper extends CommandWrapper<TakeBackupCo
}
}
List<String[]> commands = new ArrayList<>();
commands.add(new String[]{
List<String> argv = new ArrayList<>(Arrays.asList(
libvirtComputingResource.getNasBackupPath(),
"-o", "backup",
"-v", vmName,
@ -80,7 +79,27 @@ public class LibvirtTakeBackupCommandWrapper extends CommandWrapper<TakeBackupCo
"-p", backupPath,
"-q", command.getQuiesce() != null && command.getQuiesce() ? "true" : "false",
"-d", diskPaths.isEmpty() ? "" : String.join(",", diskPaths)
});
));
// Incremental NAS backup args (only added when the orchestrator asked for full/inc mode).
if (command.getMode() != null && !command.getMode().isEmpty()) {
argv.add("-M");
argv.add(command.getMode());
}
if (command.getBitmapNew() != null && !command.getBitmapNew().isEmpty()) {
argv.add("--bitmap-new");
argv.add(command.getBitmapNew());
}
if (command.getBitmapParent() != null && !command.getBitmapParent().isEmpty()) {
argv.add("--bitmap-parent");
argv.add(command.getBitmapParent());
}
if (command.getParentPath() != null && !command.getParentPath().isEmpty()) {
argv.add("--parent-path");
argv.add(command.getParentPath());
}
List<String[]> commands = new ArrayList<>();
commands.add(argv.toArray(new String[0]));
Pair<Integer, String> result = Script.executePipedCommands(commands, timeout);
@ -94,21 +113,52 @@ public class LibvirtTakeBackupCommandWrapper extends CommandWrapper<TakeBackupCo
return answer;
}
// Strip out our incremental marker lines before parsing size, so the legacy
// numeric-suffix parser keeps working.
String stdout = result.second().trim();
String bitmapCreated = null;
String bitmapRecreated = null;
boolean incrementalFallback = false;
StringBuilder filtered = new StringBuilder();
for (String line : stdout.split("\n")) {
String trimmed = line.trim();
if (trimmed.startsWith("BITMAP_CREATED=")) {
bitmapCreated = trimmed.substring("BITMAP_CREATED=".length());
continue;
}
if (trimmed.startsWith("BITMAP_RECREATED=")) {
bitmapRecreated = trimmed.substring("BITMAP_RECREATED=".length());
continue;
}
if (trimmed.startsWith("INCREMENTAL_FALLBACK=")) {
incrementalFallback = true;
continue;
}
if (filtered.length() > 0) {
filtered.append("\n");
}
filtered.append(line);
}
String numericOutput = filtered.toString().trim();
long backupSize = 0L;
if (CollectionUtils.isNullOrEmpty(diskPaths)) {
List<String> outputLines = Arrays.asList(result.second().trim().split("\n"));
List<String> outputLines = Arrays.asList(numericOutput.split("\n"));
if (!outputLines.isEmpty()) {
backupSize = Long.parseLong(outputLines.get(outputLines.size() - 1).trim());
}
} else {
String[] outputLines = result.second().trim().split("\n");
String[] outputLines = numericOutput.split("\n");
for(String line : outputLines) {
backupSize = backupSize + Long.parseLong(line.split(" ")[0].trim());
}
}
BackupAnswer answer = new BackupAnswer(command, true, result.second().trim());
BackupAnswer answer = new BackupAnswer(command, true, stdout);
answer.setSize(backupSize);
answer.setBitmapCreated(bitmapCreated);
answer.setBitmapRecreated(bitmapRecreated);
answer.setIncrementalFallback(incrementalFallback);
return answer;
}
}

View File

@ -407,6 +407,8 @@ public class LibvirtRestoreBackupCommandWrapperTest {
return 0; // File exists
} else if (command.contains("qemu-img check")) {
return 0; // File is valid
} else if (command.contains("qemu-img info") && command.contains("backing-filename")) {
return 1; // No backing chain exercise the rsync path (full backups)
}
return 0; // Other commands success
});

View File

@ -33,9 +33,18 @@ MOUNT_OPTS=""
BACKUP_DIR=""
DISK_PATHS=""
QUIESCE=""
# Incremental backup parameters (all optional; legacy callers omit them)
MODE="" # "full" or "incremental"; empty => legacy full-only behavior (no checkpoint created)
BITMAP_NEW="" # Bitmap/checkpoint name to create with this backup (e.g. "backup-1711586400")
BITMAP_PARENT="" # For incremental: parent bitmap name to read changes since
PARENT_PATH="" # For incremental: parent backup file path (used as backing for qemu-img rebase)
# Rebase operation parameters (used only with -o rebase, for chain repair on delete-middle)
REBASE_TARGET="" # The qcow2 file to repoint at a new backing (mount-relative path)
REBASE_NEW_BACKING="" # The new backing parent file (mount-relative path)
logFile="/var/log/cloudstack/agent/agent.log"
EXIT_CLEANUP_FAILED=20
EXIT_INCREMENTAL_UNSUPPORTED=21
log() {
[[ "$verb" -eq 1 ]] && builtin echo "$@"
@ -113,20 +122,93 @@ backup_running_vm() {
mount_operation
mkdir -p "$dest" || { echo "Failed to create backup directory $dest"; exit 1; }
# Determine effective mode for this run.
# Legacy callers (no -M argument) get the original full-only behavior with no checkpoint.
local effective_mode="${MODE:-legacy-full}"
local make_checkpoint=0
case "$effective_mode" in
incremental)
if [[ -z "$BITMAP_PARENT" || -z "$BITMAP_NEW" || -z "$PARENT_PATH" ]]; then
echo "incremental mode requires --bitmap-parent, --bitmap-new, and --parent-path"
cleanup
exit 1
fi
make_checkpoint=1
;;
full)
if [[ -z "$BITMAP_NEW" ]]; then
echo "full mode requires --bitmap-new (the bitmap to create for the next incremental)"
cleanup
exit 1
fi
make_checkpoint=1
;;
legacy-full)
make_checkpoint=0
;;
*)
echo "Unknown mode: $effective_mode"
cleanup
exit 1
;;
esac
# When incremental, verify the parent bitmap still exists on the running domain.
# CloudStack rebuilds the libvirt domain XML on every VM start, so persistent bitmaps
# are lost across stop/start. If the parent is missing, recreate it as a fresh bitmap
# so libvirt accepts the <incremental> reference. The first backup after a recreate
# captures all writes since the recreate point — slightly larger than ideal, but correct.
if [[ "$effective_mode" == "incremental" ]]; then
if ! virsh -c qemu:///system checkpoint-list "$VM" --name 2>/dev/null | grep -qx "$BITMAP_PARENT"; then
cat > $dest/recreate-checkpoint.xml <<XML
<domaincheckpoint><name>$BITMAP_PARENT</name><disks>
$(virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{printf "<disk name=\"%s\"/>\n", $3}')
</disks></domaincheckpoint>
XML
if ! virsh -c qemu:///system checkpoint-create "$VM" --xmlfile $dest/recreate-checkpoint.xml > /dev/null 2>&1; then
echo "Failed to recreate parent bitmap $BITMAP_PARENT for $VM"
cleanup
exit 1
fi
# Marker for the orchestrator: this incremental is larger because the bitmap was rebuilt.
echo "BITMAP_RECREATED=$BITMAP_PARENT"
rm -f $dest/recreate-checkpoint.xml
fi
fi
# Build backup XML (and matching checkpoint XML when applicable).
name="root"
echo "<domainbackup mode='push'><disks>" > $dest/backup.xml
echo "<domainbackup mode='push'>" > $dest/backup.xml
if [[ "$effective_mode" == "incremental" ]]; then
echo "<incremental>$BITMAP_PARENT</incremental>" >> $dest/backup.xml
fi
echo "<disks>" >> $dest/backup.xml
if [[ $make_checkpoint -eq 1 ]]; then
echo "<domaincheckpoint><name>$BITMAP_NEW</name><disks>" > $dest/checkpoint.xml
fi
while read -r disk fullpath; do
if [[ "$fullpath" == /dev/drbd/by-res/* ]]; then
volUuid=$(get_linstor_uuid_from_path "$fullpath")
else
volUuid="${fullpath##*/}"
fi
echo "<disk name='$disk' backup='yes' type='file' backupmode='full'><driver type='qcow2'/><target file='$dest/$name.$volUuid.qcow2' /></disk>" >> $dest/backup.xml
if [[ "$effective_mode" == "incremental" ]]; then
# Incremental disk entry — no backupmode attr, libvirt picks it up from <incremental>.
echo "<disk name='$disk' backup='yes' type='file'><driver type='qcow2'/><target file='$dest/$name.$volUuid.qcow2' /></disk>" >> $dest/backup.xml
else
echo "<disk name='$disk' backup='yes' type='file' backupmode='full'><driver type='qcow2'/><target file='$dest/$name.$volUuid.qcow2' /></disk>" >> $dest/backup.xml
fi
if [[ $make_checkpoint -eq 1 ]]; then
echo "<disk name='$disk'/>" >> $dest/checkpoint.xml
fi
name="datadisk"
done < <(
virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{print $3, $4}'
)
echo "</disks></domainbackup>" >> $dest/backup.xml
if [[ $make_checkpoint -eq 1 ]]; then
echo "</disks></domaincheckpoint>" >> $dest/checkpoint.xml
fi
local thaw=0
if [[ ${QUIESCE} == "true" ]]; then
@ -135,10 +217,16 @@ backup_running_vm() {
fi
fi
# Start push backup
# Start push backup, atomically registering the new checkpoint when applicable.
local backup_begin=0
if virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest/backup.xml 2>&1 > /dev/null; then
backup_begin=1;
if [[ $make_checkpoint -eq 1 ]]; then
if virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest/backup.xml --checkpointxml $dest/checkpoint.xml 2>&1 > /dev/null; then
backup_begin=1;
fi
else
if virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest/backup.xml 2>&1 > /dev/null; then
backup_begin=1;
fi
fi
if [[ $thaw -eq 1 ]]; then
@ -172,9 +260,37 @@ backup_running_vm() {
sleep 5
done
# Use qemu-img convert to sparsify linstor backups which get bloated due to virsh backup-begin.
# Sparsify behavior:
# - For LINSTOR backups (existing): qemu-img convert sparsifies the bloated output.
# - For INCREMENTAL: rebase the resulting thin qcow2 onto its parent so the chain is self-describing
# (so a future restore can flatten without external chain metadata).
name="root"
while read -r disk fullpath; do
if [[ "$effective_mode" == "incremental" ]]; then
volUuid="${fullpath##*/}"
if [[ "$fullpath" == /dev/drbd/by-res/* ]]; then
volUuid=$(get_linstor_uuid_from_path "$fullpath")
fi
# PARENT_PATH from the orchestrator is the parent backup's path relative to the
# NAS mount root (e.g. "i-2-X/2026.04.27.12.00.00/root.UUID.qcow2"). Convert it to
# a path relative to THIS new qcow2's directory so the backing reference resolves
# correctly the next time the NAS is mounted (mount points are ephemeral).
local parent_abs="$mount_point/$PARENT_PATH"
if [[ ! -f "$parent_abs" ]]; then
echo "Parent backup file does not exist on NAS: $parent_abs"
cleanup
exit 1
fi
local parent_rel
parent_rel=$(realpath --relative-to="$dest" "$parent_abs")
if ! qemu-img rebase -u -b "$parent_rel" -F qcow2 "$dest/$name.$volUuid.qcow2" >> "$logFile" 2> >(cat >&2); then
echo "qemu-img rebase failed for $dest/$name.$volUuid.qcow2 onto $parent_rel"
cleanup
exit 1
fi
name="datadisk"
continue
fi
if [[ "$fullpath" != /dev/drbd/by-res/* ]]; then
continue
fi
@ -191,18 +307,30 @@ backup_running_vm() {
virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{print $3, $4}'
)
rm -f $dest/backup.xml
rm -f $dest/backup.xml $dest/checkpoint.xml
sync
# Print statistics
virsh -c qemu:///system domjobinfo $VM --completed
du -sb $dest | cut -f1
if [[ -n "$BITMAP_NEW" ]]; then
# Echo the bitmap name on its own line so the Java caller can capture it for backup_details.
echo "BITMAP_CREATED=$BITMAP_NEW"
fi
umount $mount_point
rmdir $mount_point
}
backup_stopped_vm() {
# Stopped VMs cannot use libvirt's backup-begin (no QEMU process). Take a full
# backup via qemu-img convert. If the caller asked for incremental, fall back
# to full and signal the fallback so the orchestrator can record it as a full
# in the chain.
if [[ "$MODE" == "incremental" ]]; then
echo "INCREMENTAL_FALLBACK=full (VM stopped — incremental requires running VM)" >&2
fi
mount_operation
mkdir -p "$dest" || { echo "Failed to create backup directory $dest"; exit 1; }
@ -238,6 +366,51 @@ delete_backup() {
rmdir $mount_point
}
# Rebase an existing backup qcow2 (e.g. a chain child) onto a new backing parent so the chain
# stays valid after a middle backup is deleted. Both --target and --new-backing are passed as
# paths relative to the NAS mount root; we resolve them under $mount_point and write the new
# backing reference relative to the target file's directory (mount points are ephemeral).
rebase_backup() {
mount_operation
if [[ -z "$REBASE_TARGET" || -z "$REBASE_NEW_BACKING" ]]; then
echo "rebase requires --rebase-target and --rebase-new-backing"
cleanup
exit 1
fi
local target_abs="$mount_point/$REBASE_TARGET"
local backing_abs="$mount_point/$REBASE_NEW_BACKING"
if [[ ! -f "$target_abs" ]]; then
echo "Rebase target file does not exist: $target_abs"
cleanup
exit 1
fi
if [[ ! -f "$backing_abs" ]]; then
echo "New backing file does not exist: $backing_abs"
cleanup
exit 1
fi
local target_dir
target_dir=$(dirname "$target_abs")
local backing_rel
backing_rel=$(realpath --relative-to="$target_dir" "$backing_abs")
# SAFE rebase (no -u): qemu-img reads blocks from the old chain and writes them into
# the target where the new chain doesn't cover them. This is the "merge into" semantic
# required when we're about to delete the old immediate parent — the target needs to
# absorb the to-be-deleted parent's blocks so the chain remains consistent against the
# new (further-back) backing.
if ! qemu-img rebase -b "$backing_rel" -F qcow2 "$target_abs" >> "$logFile" 2> >(cat >&2); then
echo "qemu-img rebase failed for $target_abs onto $backing_rel"
cleanup
exit 1
fi
sync
umount $mount_point
rmdir $mount_point
}
get_backup_stats() {
mount_operation
@ -278,6 +451,13 @@ cleanup() {
function usage {
echo ""
echo "Usage: $0 -o <operation> -v|--vm <domain name> -t <storage type> -s <storage address> -m <mount options> -p <backup path> -d <disks path> -q|--quiesce <true|false>"
echo " [-M|--mode <full|incremental>] [--bitmap-new <name>] [--bitmap-parent <name>] [--parent-path <path>]"
echo ""
echo "Incremental backup options (running VMs only; requires QEMU >= 4.2 and libvirt >= 7.2):"
echo " -M|--mode full Take a full backup AND create a checkpoint (--bitmap-new required) for future incrementals."
echo " -M|--mode incremental Take an incremental backup since --bitmap-parent and create new checkpoint --bitmap-new."
echo " Requires --bitmap-parent, --bitmap-new, and --parent-path (parent backup file for rebase)."
echo " Without -M, behaves as legacy full-only backup with no checkpoint creation."
echo ""
exit 1
}
@ -324,6 +504,36 @@ while [[ $# -gt 0 ]]; do
shift
shift
;;
-M|--mode)
MODE="$2"
shift
shift
;;
--bitmap-new)
BITMAP_NEW="$2"
shift
shift
;;
--bitmap-parent)
BITMAP_PARENT="$2"
shift
shift
;;
--parent-path)
PARENT_PATH="$2"
shift
shift
;;
--rebase-target)
REBASE_TARGET="$2"
shift
shift
;;
--rebase-new-backing)
REBASE_NEW_BACKING="$2"
shift
shift
;;
-h|--help)
usage
shift
@ -347,6 +557,8 @@ if [ "$OP" = "backup" ]; then
fi
elif [ "$OP" = "delete" ]; then
delete_backup
elif [ "$OP" = "rebase" ]; then
rebase_backup
elif [ "$OP" = "stats" ]; then
get_backup_stats
fi

View File

@ -265,3 +265,222 @@ class TestNASBackupAndRecovery(cloudstackTestCase):
self.assertEqual(backup_repository.crosszoneinstancecreation, True, "Cross-Zone Instance Creation could not be enabled on the backup repository")
self.vm_backup_create_vm_from_backup_int(template.id, [network.id])
# ------------------------------------------------------------------
# Incremental backup tests (RFC #12899 / PR #13074)
# ------------------------------------------------------------------
# These tests exercise the incremental NAS backup chain semantics:
# full -> incN cadence, restore-from-incremental, delete-middle chain
# repair, refuse-delete-full-with-children, and stopped-VM fallback.
#
# All tests set nas.backup.full.every to a small value (3) so a chain
# forms quickly without needing many backup iterations. They restore
# the original value at teardown.
def _set_full_every(self, value):
Configurations.update(self.apiclient, name='nas.backup.full.every',
value=str(value), zoneid=self.zone.id)
def _backup_type(self, backup):
# Backup objects expose `type`; for chained backups it's "INCREMENTAL", else "FULL".
return getattr(backup, 'type', 'FULL') or 'FULL'
@attr(tags=["advanced", "backup"], required_hardware="true")
def test_incremental_chain_cadence(self):
"""
With nas.backup.full.every=3, the sequence of backups should be
FULL, INCREMENTAL, INCREMENTAL, FULL, INCREMENTAL, ...
"""
self.backup_offering.assignOffering(self.apiclient, self.vm.id)
self._set_full_every(3)
try:
ssh_client_vm = self.vm.get_ssh_client(reconnect=True)
ssh_client_vm.execute("touch /root/incremental_marker_1.txt")
created = []
for i in range(5):
Backup.create(self.apiclient, self.vm.id, "inc_chain_%d" % i)
# write a small change so each incremental has something to capture
ssh_client_vm.execute("dd if=/dev/urandom of=/root/delta_%d bs=64k count=4 2>/dev/null" % i)
time.sleep(2)
created = Backup.list(self.apiclient, self.vm.id)
self.assertEqual(len(created), 5, "Expected 5 backups after 5 Backup.create calls")
# Sort oldest-first by date
created.sort(key=lambda b: b.created)
expected = ['FULL', 'INCREMENTAL', 'INCREMENTAL', 'FULL', 'INCREMENTAL']
actual = [self._backup_type(b).upper() for b in created]
self.assertEqual(actual, expected,
"With nas.backup.full.every=3, chain pattern should be %s but was %s" % (expected, actual))
# Cleanup all backups (newest first to satisfy chain rules without forced=true)
for b in reversed(created):
Backup.delete(self.apiclient, b.id)
finally:
self._set_full_every(10)
self.backup_offering.removeOffering(self.apiclient, self.vm.id)
@attr(tags=["advanced", "backup"], required_hardware="true")
def test_restore_from_incremental(self):
"""
Take FULL + 2 INCREMENTAL backups, each with a marker file. Restore from the
latest incremental and verify all three markers are present (chain flatten).
"""
self.backup_offering.assignOffering(self.apiclient, self.vm.id)
self._set_full_every(5)
try:
ssh_client_vm = self.vm.get_ssh_client(reconnect=True)
ssh_client_vm.execute("touch /root/marker_full.txt")
Backup.create(self.apiclient, self.vm.id, "rfi_full")
time.sleep(3)
ssh_client_vm.execute("touch /root/marker_inc1.txt")
Backup.create(self.apiclient, self.vm.id, "rfi_inc1")
time.sleep(3)
ssh_client_vm.execute("touch /root/marker_inc2.txt")
Backup.create(self.apiclient, self.vm.id, "rfi_inc2")
time.sleep(3)
backups = Backup.list(self.apiclient, self.vm.id)
backups.sort(key=lambda b: b.created)
self.assertEqual(len(backups), 3)
self.assertEqual(self._backup_type(backups[0]).upper(), 'FULL')
self.assertEqual(self._backup_type(backups[2]).upper(), 'INCREMENTAL')
new_vm_name = "vm-from-inc-" + str(int(time.time()))
new_vm = Backup.createVMFromBackup(self.apiclient, self.services["small"],
mode=self.services["mode"], backupid=backups[2].id, vmname=new_vm_name,
accountname=self.account.name, domainid=self.account.domainid,
zoneid=self.zone.id)
self.cleanup.append(new_vm)
ssh_new = new_vm.get_ssh_client(reconnect=True)
for marker in ("marker_full.txt", "marker_inc1.txt", "marker_inc2.txt"):
result = ssh_new.execute("ls /root/%s" % marker)
self.assertIn(marker, result[0],
"Restored VM should have %s (chain flattened correctly)" % marker)
for b in reversed(backups):
Backup.delete(self.apiclient, b.id)
finally:
self._set_full_every(10)
self.backup_offering.removeOffering(self.apiclient, self.vm.id)
@attr(tags=["advanced", "backup"], required_hardware="true")
def test_delete_middle_incremental_repairs_chain(self):
"""
Delete a MIDDLE incremental from a FULL -> INC1 -> INC2 chain.
The chain repair should rebase INC2 onto FULL, and the final restore
should still produce a working VM with all expected blocks.
"""
self.backup_offering.assignOffering(self.apiclient, self.vm.id)
self._set_full_every(5)
try:
ssh_client_vm = self.vm.get_ssh_client(reconnect=True)
ssh_client_vm.execute("touch /root/dmi_full.txt")
Backup.create(self.apiclient, self.vm.id, "dmi_full")
time.sleep(3)
ssh_client_vm.execute("touch /root/dmi_inc1.txt")
Backup.create(self.apiclient, self.vm.id, "dmi_inc1")
time.sleep(3)
ssh_client_vm.execute("touch /root/dmi_inc2.txt")
Backup.create(self.apiclient, self.vm.id, "dmi_inc2")
time.sleep(3)
backups = Backup.list(self.apiclient, self.vm.id)
backups.sort(key=lambda b: b.created)
full, inc1, inc2 = backups[0], backups[1], backups[2]
# Delete the middle incremental — should succeed via chain repair (no force needed)
Backup.delete(self.apiclient, inc1.id)
remaining = Backup.list(self.apiclient, self.vm.id)
self.assertEqual(len(remaining), 2, "After deleting middle inc, two backups should remain")
# Restore from the remaining tail (formerly inc2) — must still produce a usable VM
new_vm_name = "vm-after-mid-del-" + str(int(time.time()))
new_vm = Backup.createVMFromBackup(self.apiclient, self.services["small"],
mode=self.services["mode"], backupid=inc2.id, vmname=new_vm_name,
accountname=self.account.name, domainid=self.account.domainid,
zoneid=self.zone.id)
self.cleanup.append(new_vm)
ssh_new = new_vm.get_ssh_client(reconnect=True)
# Both the FULL marker and (importantly) the deleted-INC1 marker should still
# be present, because the rebase merged INC1's blocks into INC2.
for marker in ("dmi_full.txt", "dmi_inc1.txt", "dmi_inc2.txt"):
result = ssh_new.execute("ls /root/%s" % marker)
self.assertIn(marker, result[0],
"After mid-incremental delete and rebase, %s should still be restorable" % marker)
Backup.delete(self.apiclient, inc2.id)
Backup.delete(self.apiclient, full.id)
finally:
self._set_full_every(10)
self.backup_offering.removeOffering(self.apiclient, self.vm.id)
@attr(tags=["advanced", "backup"], required_hardware="true")
def test_refuse_delete_full_with_children(self):
"""
Deleting a FULL that has surviving incrementals must fail without forced=true.
With forced=true it must succeed and remove the entire chain.
"""
self.backup_offering.assignOffering(self.apiclient, self.vm.id)
self._set_full_every(5)
try:
Backup.create(self.apiclient, self.vm.id, "rdc_full")
time.sleep(3)
Backup.create(self.apiclient, self.vm.id, "rdc_inc")
time.sleep(3)
backups = Backup.list(self.apiclient, self.vm.id)
backups.sort(key=lambda b: b.created)
full = backups[0]
failed = False
try:
Backup.delete(self.apiclient, full.id)
except Exception:
failed = True
self.assertTrue(failed, "Deleting a FULL with children should be refused without forced=true")
# Forced delete should succeed and clear the whole chain
Backup.delete(self.apiclient, full.id, forced=True)
remaining = Backup.list(self.apiclient, self.vm.id)
self.assertIsNone(remaining, "Forced delete of FULL should remove the entire chain")
finally:
self._set_full_every(10)
self.backup_offering.removeOffering(self.apiclient, self.vm.id)
@attr(tags=["advanced", "backup"], required_hardware="true")
def test_stopped_vm_falls_back_to_full(self):
"""
When a backup is requested while the VM is stopped, even if the chain cadence
would call for an incremental, the agent must fall back to a full and start a
new chain. The incrementalFallback flag should be reflected in backup.type=FULL.
"""
self.backup_offering.assignOffering(self.apiclient, self.vm.id)
self._set_full_every(2) # next backup after the first should be incremental
try:
Backup.create(self.apiclient, self.vm.id, "svf_first")
time.sleep(3)
# Stop the VM and trigger another backup — should fall back to FULL
self.vm.stop(self.apiclient)
time.sleep(5)
Backup.create(self.apiclient, self.vm.id, "svf_second")
time.sleep(3)
backups = Backup.list(self.apiclient, self.vm.id)
backups.sort(key=lambda b: b.created)
self.assertEqual(len(backups), 2)
self.assertEqual(self._backup_type(backups[0]).upper(), 'FULL')
self.assertEqual(self._backup_type(backups[1]).upper(), 'FULL',
"Stopped-VM backup must be a FULL even when cadence would have asked for an INCREMENTAL")
self.vm.start(self.apiclient)
for b in reversed(backups):
Backup.delete(self.apiclient, b.id)
finally:
self._set_full_every(10)
self.backup_offering.removeOffering(self.apiclient, self.vm.id)