From 44491448e3811cfcebe5c3aeee72ba7a01948cda Mon Sep 17 00:00:00 2001 From: Aaron Hurt Date: Fri, 8 Jul 2016 22:57:06 -0500 Subject: [PATCH] Cleanup rbd contexts and improve exception logging We noticed that when an exception occurs within the cleanup loop inside the deletePhysicalDisk routine that the previously allocated contexts are not cleaned up. This seemed to cause an eventual crash of the host agent after multiple exceptions within the loop. In addition to ensuring the contexts are always freed we also improved the logging when exceptions do occur to include the actual return code from the underlying library in deletePhysicalDisk and deleteSnapshot. --- .../kvm/storage/KVMStorageProcessor.java | 13 ++++++- .../kvm/storage/LibvirtStorageAdaptor.java | 37 +++++++++++-------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java index ed2fa495f5c..77cbdd3a79a 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java @@ -69,7 +69,10 @@ import org.libvirt.LibvirtException; import com.ceph.rados.IoCTX; import com.ceph.rados.Rados; +import com.ceph.rados.exceptions.ErrorCode; +import com.ceph.rados.exceptions.RadosException; import com.ceph.rbd.Rbd; +import com.ceph.rbd.RbdException; import com.ceph.rbd.RbdImage; import com.cloud.agent.api.Answer; import com.cloud.agent.api.storage.PrimaryStorageDownloadAnswer; @@ -1325,6 +1328,8 @@ public class KVMStorageProcessor implements StorageProcessor { image.snapRemove(snapshotName); s_logger.info("Snapshot " + snap_full_name + " successfully removed from " + primaryPool.getType().toString() + " pool."); + } catch (RbdException e) { + s_logger.error(e.toString() + " - " + ErrorCode.getErrorMessage(e.getReturnValue())); } finally { rbd.close(image); r.ioCtxDestroy(io); @@ -1334,8 +1339,14 @@ public class KVMStorageProcessor implements StorageProcessor { throw new InternalErrorException("Operation not implemented for storage pool type of " + primaryPool.getType().toString()); } return new Answer(cmd, true, "Snapshot " + snap_full_name + " removed successfully."); + } catch (RadosException e) { + s_logger.error(e.toString() + " - " + ErrorCode.getErrorMessage(e.getReturnValue())); + return new Answer(cmd, false, "Failed to remove snapshot " + snap_full_name); + } catch (RbdException e) { + s_logger.error(e.toString() + " - " + ErrorCode.getErrorMessage(e.getReturnValue())); + return new Answer(cmd, false, "Failed to remove snapshot " + snap_full_name); } catch (Exception e) { - s_logger.error(e.getMessage()); + s_logger.error(e.toString()); return new Answer(cmd, false, "Failed to remove snapshot " + snap_full_name); } } diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java index d7bdab2532d..5a35973d618 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java @@ -35,6 +35,7 @@ import org.libvirt.StorageVol; import com.ceph.rados.IoCTX; import com.ceph.rados.Rados; +import com.ceph.rados.exceptions.ErrorCode; import com.ceph.rados.exceptions.RadosException; import com.ceph.rbd.Rbd; import com.ceph.rbd.RbdException; @@ -863,26 +864,30 @@ public class LibvirtStorageAdaptor implements StorageAdaptor { RbdImage image = rbd.open(uuid); s_logger.debug("Fetching list of snapshots of RBD image " + pool.getSourceDir() + "/" + uuid); List snaps = image.snapList(); - for (RbdSnapInfo snap : snaps) { - if (image.snapIsProtected(snap.name)) { - s_logger.debug("Unprotecting snapshot " + pool.getSourceDir() + "/" + uuid + "@" + snap.name); - image.snapUnprotect(snap.name); - } else { - s_logger.debug("Snapshot " + pool.getSourceDir() + "/" + uuid + "@" + snap.name + " is not protected."); + try { + for (RbdSnapInfo snap : snaps) { + if (image.snapIsProtected(snap.name)) { + s_logger.debug("Unprotecting snapshot " + pool.getSourceDir() + "/" + uuid + "@" + snap.name); + image.snapUnprotect(snap.name); + } else { + s_logger.debug("Snapshot " + pool.getSourceDir() + "/" + uuid + "@" + snap.name + " is not protected."); + } + s_logger.debug("Removing snapshot " + pool.getSourceDir() + "/" + uuid + "@" + snap.name); + image.snapRemove(snap.name); } - s_logger.debug("Removing snapshot " + pool.getSourceDir() + "/" + uuid + "@" + snap.name); - image.snapRemove(snap.name); + s_logger.info("Succesfully unprotected and removed any remaining snapshots (" + snaps.size() + ") of " + + pool.getSourceDir() + "/" + uuid + " Continuing to remove the RBD image"); + } catch (RbdException e) { + throw new CloudRuntimeException(e.toString() + " - " + ErrorCode.getErrorMessage(e.getReturnValue())); + } finally { + s_logger.debug("Closing image and destroying context"); + rbd.close(image); + r.ioCtxDestroy(io); } - - rbd.close(image); - r.ioCtxDestroy(io); - - s_logger.info("Succesfully unprotected and removed any remaining snapshots (" + snaps.size() + ") of " - + pool.getSourceDir() + "/" + uuid + " Continuing to remove the RBD image"); } catch (RadosException e) { - throw new CloudRuntimeException(e.toString()); + throw new CloudRuntimeException(e.toString() + " - " + ErrorCode.getErrorMessage(e.getReturnValue())); } catch (RbdException e) { - throw new CloudRuntimeException(e.toString()); + throw new CloudRuntimeException(e.toString() + " - " + ErrorCode.getErrorMessage(e.getReturnValue())); } }