rbd: Enable snapshotting of RBD images

Signed-off-by: Wido den Hollander <wido@42on.com>
This commit is contained in:
Wido den Hollander 2013-07-18 18:16:39 +02:00 committed by Wido den Hollander
parent 4eefce15be
commit b611394cca
5 changed files with 250 additions and 69 deletions

View File

@ -48,6 +48,12 @@
<artifactId>rados</artifactId>
<version>${cs.rados-java.version}</version>
</dependency>
<dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<scope>provided</scope>
<version>${cs.jna.version}</version>
</dependency>
</dependencies>
<build>
<defaultGoal>install</defaultGoal>

View File

@ -21,6 +21,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.BufferedOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
@ -239,6 +240,13 @@ import com.cloud.vm.DiskProfile;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.State;
import com.ceph.rados.Rados;
import com.ceph.rados.RadosException;
import com.ceph.rados.IoCTX;
import com.ceph.rbd.Rbd;
import com.ceph.rbd.RbdImage;
import com.ceph.rbd.RbdException;
/**
* LibvirtComputingResource execute requests on the computing/routing host using
* the libvirt API
@ -1972,12 +1980,6 @@ ServerResource {
cmd.getPool().getType(),
cmd.getPool().getUuid());
if (primaryPool.getType() == StoragePoolType.RBD) {
s_logger.debug("Snapshots are not supported on RBD volumes");
return new ManageSnapshotAnswer(cmd, false,
"Snapshots are not supported on RBD volumes");
}
KVMPhysicalDisk disk = primaryPool.getPhysicalDisk(cmd
.getVolumePath());
if (state == DomainInfo.DomainState.VIR_DOMAIN_RUNNING
@ -2004,23 +2006,63 @@ ServerResource {
vm.resume();
}
} else {
/**
* For RBD we can't use libvirt to do our snapshotting or any Bash scripts.
* libvirt also wants to store the memory contents of the Virtual Machine,
* but that's not possible with RBD since there is no way to store the memory
* contents in RBD.
*
* So we rely on the Java bindings for RBD to create our snapshot
*
* This snapshot might not be 100% consistent due to writes still being in the
* memory of the Virtual Machine, but if the VM runs a kernel which supports
* barriers properly (>2.6.32) this won't be any different then pulling the power
* cord out of a running machine.
*/
if (primaryPool.getType() == StoragePoolType.RBD) {
try {
Rados r = new Rados(primaryPool.getAuthUserName());
r.confSet("mon_host", primaryPool.getSourceHost() + ":" + primaryPool.getSourcePort());
r.confSet("key", primaryPool.getAuthSecret());
r.connect();
s_logger.debug("Succesfully connected to Ceph cluster at " + r.confGet("mon_host"));
/* VM is not running, create a snapshot by ourself */
final Script command = new Script(_manageSnapshotPath,
_cmdsTimeout, s_logger);
if (cmd.getCommandSwitch().equalsIgnoreCase(
ManageSnapshotCommand.CREATE_SNAPSHOT)) {
command.add("-c", disk.getPath());
IoCTX io = r.ioCtxCreate(primaryPool.getSourceDir());
Rbd rbd = new Rbd(io);
RbdImage image = rbd.open(disk.getName());
if (cmd.getCommandSwitch().equalsIgnoreCase(
ManageSnapshotCommand.CREATE_SNAPSHOT)) {
s_logger.debug("Attempting to create RBD snapshot " + disk.getName() + "@" + snapshotName);
image.snapCreate(snapshotName);
} else {
s_logger.debug("Attempting to remove RBD snapshot " + disk.getName() + "@" + snapshotName);
image.snapRemove(snapshotName);
}
rbd.close(image);
r.ioCtxDestroy(io);
} catch (Exception e) {
s_logger.error("A RBD snapshot operation on " + disk.getName() + " failed. The error was: " + e.getMessage());
}
} else {
command.add("-d", snapshotPath);
}
/* VM is not running, create a snapshot by ourself */
final Script command = new Script(_manageSnapshotPath,
_cmdsTimeout, s_logger);
if (cmd.getCommandSwitch().equalsIgnoreCase(
ManageSnapshotCommand.CREATE_SNAPSHOT)) {
command.add("-c", disk.getPath());
} else {
command.add("-d", snapshotPath);
}
command.add("-n", snapshotName);
String result = command.execute();
if (result != null) {
s_logger.debug("Failed to manage snapshot: " + result);
return new ManageSnapshotAnswer(cmd, false,
"Failed to manage snapshot: " + result);
command.add("-n", snapshotName);
String result = command.execute();
if (result != null) {
s_logger.debug("Failed to manage snapshot: " + result);
return new ManageSnapshotAnswer(cmd, false,
"Failed to manage snapshot: " + result);
}
}
}
return new ManageSnapshotAnswer(cmd, cmd.getSnapshotId(),
@ -2062,16 +2104,74 @@ ServerResource {
cmd.getPrimaryStoragePoolNameLabel());
KVMPhysicalDisk snapshotDisk = primaryPool.getPhysicalDisk(cmd
.getVolumePath());
Script command = new Script(_manageSnapshotPath, _cmdsTimeout,
s_logger);
command.add("-b", snapshotDisk.getPath());
command.add("-n", snapshotName);
command.add("-p", snapshotDestPath);
command.add("-t", snapshotName);
String result = command.execute();
if (result != null) {
s_logger.debug("Failed to backup snaptshot: " + result);
return new BackupSnapshotAnswer(cmd, false, result, null, true);
/**
* RBD snapshots can't be copied using qemu-img, so we have to use
* the Java bindings for librbd here.
*
* These bindings will read the snapshot and write the contents to
* the secondary storage directly
*
* It will stop doing so if the amount of time spend is longer then
* cmds.timeout
*/
if (primaryPool.getType() == StoragePoolType.RBD) {
try {
Rados r = new Rados(primaryPool.getAuthUserName());
r.confSet("mon_host", primaryPool.getSourceHost() + ":" + primaryPool.getSourcePort());
r.confSet("key", primaryPool.getAuthSecret());
r.connect();
s_logger.debug("Succesfully connected to Ceph cluster at " + r.confGet("mon_host"));
IoCTX io = r.ioCtxCreate(primaryPool.getSourceDir());
Rbd rbd = new Rbd(io);
RbdImage image = rbd.open(snapshotDisk.getName(), snapshotName);
long startTime = System.currentTimeMillis() / 1000;
File fh = new File(snapshotDestPath);
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(fh));
int chunkSize = 4194304;
long offset = 0;
s_logger.debug("Backuping up RBD snapshot " + snapshotName + " to " + snapshotDestPath);
while(true) {
byte[] buf = new byte[chunkSize];
int bytes = image.read(offset, buf, chunkSize);
if (bytes <= 0) {
break;
}
bos.write(buf, 0, bytes);
offset += bytes;
}
s_logger.debug("Completed backing up RBD snapshot " + snapshotName + " to " + snapshotDestPath + ". Bytes written: " + offset);
bos.close();
r.ioCtxDestroy(io);
} catch (RadosException e) {
s_logger.error("A RADOS operation failed. The error was: " + e.getMessage());
return new BackupSnapshotAnswer(cmd, false, e.toString(), null, true);
} catch (RbdException e) {
s_logger.error("A RBD operation on " + snapshotDisk.getName() + " failed. The error was: " + e.getMessage());
return new BackupSnapshotAnswer(cmd, false, e.toString(), null, true);
} catch (FileNotFoundException e) {
s_logger.error("Failed to open " + snapshotDestPath + ". The error was: " + e.getMessage());
return new BackupSnapshotAnswer(cmd, false, e.toString(), null, true);
} catch (IOException e) {
s_logger.debug("An I/O error occured during a snapshot operation on " + snapshotDestPath);
return new BackupSnapshotAnswer(cmd, false, e.toString(), null, true);
}
} else {
Script command = new Script(_manageSnapshotPath, _cmdsTimeout,
s_logger);
command.add("-b", snapshotDisk.getPath());
command.add("-n", snapshotName);
command.add("-p", snapshotDestPath);
command.add("-t", snapshotName);
String result = command.execute();
if (result != null) {
s_logger.debug("Failed to backup snaptshot: " + result);
return new BackupSnapshotAnswer(cmd, false, result, null, true);
}
}
/* Delete the snapshot on primary */
@ -2108,11 +2208,11 @@ ServerResource {
vm.resume();
}
} else {
command = new Script(_manageSnapshotPath, _cmdsTimeout,
Script command = new Script(_manageSnapshotPath, _cmdsTimeout,
s_logger);
command.add("-d", snapshotDisk.getPath());
command.add("-n", snapshotName);
result = command.execute();
String result = command.execute();
if (result != null) {
s_logger.debug("Failed to backup snapshot: " + result);
return new BackupSnapshotAnswer(cmd, false,

View File

@ -46,6 +46,7 @@ import com.ceph.rados.IoCTX;
import com.ceph.rbd.Rbd;
import com.ceph.rbd.RbdImage;
import com.ceph.rbd.RbdException;
import com.ceph.rbd.jna.RbdSnapInfo;
import com.cloud.agent.api.ManageSnapshotCommand;
import com.cloud.hypervisor.kvm.resource.LibvirtConnection;
@ -73,6 +74,8 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
private String _manageSnapshotPath;
private String rbdTemplateSnapName = "cloudstack-base-snap";
private int rbdFeatures = (1<<0); /* Feature 1<<0 means layering in RBD format 2 */
private int rbdOrder = 0; /* Order 0 means 4MB blocks (the default) */
public LibvirtStorageAdaptor(StorageLayer storage) {
_storageLayer = storage;
@ -615,38 +618,116 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
StoragePool virtPool = libvirtPool.getPool();
LibvirtStorageVolumeDef.volFormat libvirtformat = null;
String volPath = null;
String volName = null;
long volAllocation = 0;
long volCapacity = 0;
/**
* To have RBD function properly we want RBD images of format 2
* libvirt currently defaults to format 1
*
* For that reason we use the native RBD bindings to create the
* RBD image until libvirt creates RBD format 2 by default
*/
if (pool.getType() == StoragePoolType.RBD) {
format = PhysicalDiskFormat.RAW;
try {
s_logger.info("Creating RBD image " + pool.getSourcePort() + "/" + name + " with size " + size);
Rados r = new Rados(pool.getAuthUserName());
r.confSet("mon_host", pool.getSourceHost() + ":" + pool.getSourcePort());
r.confSet("key", pool.getAuthSecret());
r.connect();
s_logger.debug("Succesfully connected to Ceph cluster at " + r.confGet("mon_host"));
IoCTX io = r.ioCtxCreate(pool.getSourceDir());
Rbd rbd = new Rbd(io);
rbd.create(name, size, this.rbdFeatures, this.rbdOrder);
r.ioCtxDestroy(io);
} catch (RadosException e) {
throw new CloudRuntimeException(e.toString());
} catch (RbdException e) {
throw new CloudRuntimeException(e.toString());
}
volPath = name;
volName = name;
volCapacity = size;
volAllocation = size;
} else {
if (format == PhysicalDiskFormat.QCOW2) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.QCOW2;
} else if (format == PhysicalDiskFormat.RAW) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.RAW;
} else if (format == PhysicalDiskFormat.DIR) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.DIR;
} else if (format == PhysicalDiskFormat.TAR) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.TAR;
}
LibvirtStorageVolumeDef volDef = new LibvirtStorageVolumeDef(name,
size, libvirtformat, null, null);
s_logger.debug(volDef.toString());
try {
StorageVol vol = virtPool.storageVolCreateXML(volDef.toString(), 0);
volPath = vol.getPath();
volName = vol.getName();
volAllocation = vol.getInfo().allocation;
volCapacity = vol.getInfo().capacity;
} catch (LibvirtException e) {
throw new CloudRuntimeException(e.toString());
}
}
if (format == PhysicalDiskFormat.QCOW2) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.QCOW2;
} else if (format == PhysicalDiskFormat.RAW) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.RAW;
} else if (format == PhysicalDiskFormat.DIR) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.DIR;
} else if (format == PhysicalDiskFormat.TAR) {
libvirtformat = LibvirtStorageVolumeDef.volFormat.TAR;
}
LibvirtStorageVolumeDef volDef = new LibvirtStorageVolumeDef(name,
size, libvirtformat, null, null);
s_logger.debug(volDef.toString());
try {
StorageVol vol = virtPool.storageVolCreateXML(volDef.toString(), 0);
KVMPhysicalDisk disk = new KVMPhysicalDisk(vol.getPath(),
vol.getName(), pool);
disk.setFormat(format);
disk.setSize(vol.getInfo().allocation);
disk.setVirtualSize(vol.getInfo().capacity);
return disk;
} catch (LibvirtException e) {
throw new CloudRuntimeException(e.toString());
}
KVMPhysicalDisk disk = new KVMPhysicalDisk(volPath, volName, pool);
disk.setFormat(format);
disk.setSize(volAllocation);
disk.setVirtualSize(volCapacity);
return disk;
}
@Override
public boolean deletePhysicalDisk(String uuid, KVMStoragePool pool) {
/**
* RBD volume can have snapshots and while they exist libvirt
* can't remove the RBD volume
*
* We have to remove those snapshots first
*/
if (pool.getType() == StoragePoolType.RBD) {
try {
s_logger.info("Unprotecting and Removing RBD snapshots of image "
+ pool.getSourcePort() + "/" + uuid + " prior to removing the image");
Rados r = new Rados(pool.getAuthUserName());
r.confSet("mon_host", pool.getSourceHost() + ":" + pool.getSourcePort());
r.confSet("key", pool.getAuthSecret());
r.connect();
s_logger.debug("Succesfully connected to Ceph cluster at " + r.confGet("mon_host"));
IoCTX io = r.ioCtxCreate(pool.getSourceDir());
Rbd rbd = new Rbd(io);
RbdImage image = rbd.open(uuid);
List<RbdSnapInfo> snaps = image.snapList();
for (RbdSnapInfo snap : snaps) {
image.snapUnprotect(snap.name);
image.snapRemove(snap.name);
}
rbd.close(image);
r.ioCtxDestroy(io);
} catch (RadosException e) {
throw new CloudRuntimeException(e.toString());
} catch (RbdException e) {
throw new CloudRuntimeException(e.toString());
}
}
LibvirtStoragePool libvirtPool = (LibvirtStoragePool) pool;
try {
StorageVol vol = this.getVolume(libvirtPool.getPool(), uuid);
@ -730,11 +811,6 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
* we want to copy it
*/
/* Feature 1<<0 means layering in RBD format 2 */
int rbdFeatures = (1<<0);
/* Order 0 means 4MB blocks (the default) */
int rbdOrder = 0;
try {
if ((srcPool.getSourceHost().equals(destPool.getSourceHost())) && (srcPool.getSourceDir().equals(destPool.getSourceDir()))) {
/* We are on the same Ceph cluster, but we require RBD format 2 on the source image */
@ -755,7 +831,7 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
s_logger.debug("The source image " + srcPool.getSourceDir() + "/" + template.getName()
+ " is RBD format 1. We have to perform a regular copy (" + template.getVirtualSize() + " bytes)");
rbd.create(disk.getName(), template.getVirtualSize(), rbdFeatures, rbdOrder);
rbd.create(disk.getName(), template.getVirtualSize(), this.rbdFeatures, this.rbdOrder);
RbdImage destImage = rbd.open(disk.getName());
s_logger.debug("Starting to copy " + srcImage.getName() + " to " + destImage.getName() + " in Ceph pool " + srcPool.getSourceDir());
@ -768,7 +844,7 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
+ " is RBD format 2. We will perform a RBD clone using snapshot "
+ this.rbdTemplateSnapName);
/* The source image is format 2, we can do a RBD snapshot+clone (layering) */
rbd.clone(template.getName(), this.rbdTemplateSnapName, io, disk.getName(), rbdFeatures, rbdOrder);
rbd.clone(template.getName(), this.rbdTemplateSnapName, io, disk.getName(), this.rbdFeatures, this.rbdOrder);
s_logger.debug("Succesfully cloned " + template.getName() + "@" + this.rbdTemplateSnapName + " to " + disk.getName());
}
@ -798,7 +874,7 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
s_logger.debug("Creating " + disk.getName() + " on the destination cluster " + rDest.confGet("mon_host")
+ " in pool " + destPool.getSourceDir());
dRbd.create(disk.getName(), template.getVirtualSize(), rbdFeatures, rbdOrder);
dRbd.create(disk.getName(), template.getVirtualSize(), this.rbdFeatures, this.rbdOrder);
RbdImage srcImage = sRbd.open(template.getName());
RbdImage destImage = dRbd.open(disk.getName());
@ -943,8 +1019,6 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
*/
s_logger.debug("The source image is not RBD, but the destination is. We will convert into RBD format 2");
String tmpFile = "/tmp/" + name;
int rbdFeatures = (1<<0);
int rbdOrder = 0;
try {
srcFile = new QemuImgFile(sourcePath, sourceFormat);
@ -963,7 +1037,7 @@ public class LibvirtStorageAdaptor implements StorageAdaptor {
Rbd rbd = new Rbd(io);
s_logger.debug("Creating RBD image " + name + " in Ceph pool " + destPool.getSourceDir() + " with RBD format 2");
rbd.create(name, disk.getVirtualSize(), rbdFeatures, rbdOrder);
rbd.create(name, disk.getVirtualSize(), this.rbdFeatures, this.rbdOrder);
RbdImage image = rbd.open(name);

View File

@ -140,7 +140,7 @@ public class LibvirtStoragePool implements KVMStoragePool {
@Override
public boolean isExternalSnapshot() {
if (this.type == StoragePoolType.Filesystem) {
if (this.type == StoragePoolType.Filesystem || this.type == StoragePoolType.RBD) {
return false;
}

View File

@ -82,9 +82,10 @@
<cs.java-ipv6.version>0.10</cs.java-ipv6.version>
<cs.replace.properties>build/replace.properties</cs.replace.properties>
<cs.libvirt-java.version>0.4.9</cs.libvirt-java.version>
<cs.rados-java.version>0.1.1</cs.rados-java.version>
<cs.rados-java.version>0.1.2</cs.rados-java.version>
<cs.target.dir>target</cs.target.dir>
<cs.daemon.version>1.0.10</cs.daemon.version>
<cs.jna.version>3.0.9</cs.jna.version>
</properties>
<distributionManagement>