From d6b41d9ac21a9740494bfcfd58ea51d712e52f4e Mon Sep 17 00:00:00 2001 From: Koushik Das Date: Fri, 9 Dec 2016 15:49:39 +0530 Subject: [PATCH] CLOUDSTACK-9660: NPE while destroying volumes during 1000 VMs deploy and destroy tests NPE is seen as VM destroy and storage cleanup threads try to remove the same root volume. Fix is to handle only non-root volumes in storage cleanup thread, root volumes will be handled as part of VM destroy. --- .../schema/src/com/cloud/storage/dao/VolumeDao.java | 2 +- .../src/com/cloud/storage/dao/VolumeDaoImpl.java | 4 +++- .../cloudstack/storage/volume/VolumeObject.java | 7 ++++--- .../cloudstack/storage/volume/VolumeServiceImpl.java | 5 +++++ server/src/com/cloud/storage/StorageManagerImpl.java | 11 ++++++++--- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/engine/schema/src/com/cloud/storage/dao/VolumeDao.java b/engine/schema/src/com/cloud/storage/dao/VolumeDao.java index a05dc1f560d..f2d5fc73520 100644 --- a/engine/schema/src/com/cloud/storage/dao/VolumeDao.java +++ b/engine/schema/src/com/cloud/storage/dao/VolumeDao.java @@ -80,7 +80,7 @@ public interface VolumeDao extends GenericDao, StateDao listVolumesToBeDestroyed(); - List listVolumesToBeDestroyed(Date date); + List listNonRootVolumesToBeDestroyed(Date date); ImageFormat getImageFormat(Long volumeId); diff --git a/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java b/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java index 6ed556eabec..4f5b613ddd1 100644 --- a/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java +++ b/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java @@ -325,6 +325,7 @@ public class VolumeDaoImpl extends GenericDaoBase implements Vol AllFieldsSearch.and("deviceId", AllFieldsSearch.entity().getDeviceId(), Op.EQ); AllFieldsSearch.and("poolId", AllFieldsSearch.entity().getPoolId(), Op.EQ); AllFieldsSearch.and("vType", AllFieldsSearch.entity().getVolumeType(), Op.EQ); + AllFieldsSearch.and("notVolumeType", AllFieldsSearch.entity().getVolumeType(), Op.NEQ); AllFieldsSearch.and("id", AllFieldsSearch.entity().getId(), Op.EQ); AllFieldsSearch.and("destroyed", AllFieldsSearch.entity().getState(), Op.EQ); AllFieldsSearch.and("notDestroyed", AllFieldsSearch.entity().getState(), Op.NEQ); @@ -481,9 +482,10 @@ public class VolumeDaoImpl extends GenericDaoBase implements Vol } @Override - public List listVolumesToBeDestroyed(Date date) { + public List listNonRootVolumesToBeDestroyed(Date date) { SearchCriteria sc = AllFieldsSearch.create(); sc.setParameters("state", Volume.State.Destroy); + sc.setParameters("notVolumeType", Volume.Type.ROOT.toString()); sc.setParameters("updateTime", date); return listBy(sc); diff --git a/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeObject.java b/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeObject.java index 5bf49a9a813..b7f459227aa 100644 --- a/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeObject.java +++ b/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeObject.java @@ -174,11 +174,11 @@ public class VolumeObject implements VolumeInfo { } @Override - public boolean stateTransit(Volume.Event event) { + public boolean stateTransit(Volume.Event event) { boolean result = false; try { volumeVO = volumeDao.findById(volumeVO.getId()); - if(volumeVO != null) { + if (volumeVO != null) { result = _volStateMachine.transitTo(volumeVO, event, null, volumeDao); volumeVO = volumeDao.findById(volumeVO.getId()); } @@ -332,8 +332,9 @@ public class VolumeObject implements VolumeInfo { throw new CloudRuntimeException("Failed to update state:" + e.toString()); } finally { // in case of OperationFailed, expunge the entry + // state transit call reloads the volume from DB and so check for null as well if (event == ObjectInDataStoreStateMachine.Event.OperationFailed && - (volumeVO.getState() != Volume.State.Copying && volumeVO.getState() != Volume.State.Uploaded && volumeVO.getState() != Volume.State.UploadError)) { + (volumeVO != null && volumeVO.getState() != Volume.State.Copying && volumeVO.getState() != Volume.State.Uploaded && volumeVO.getState() != Volume.State.UploadError)) { objectInStoreMgr.deleteIfNotReady(this); } } diff --git a/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java b/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java index 05ec7d02706..0b58bf2fecb 100644 --- a/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java +++ b/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java @@ -316,6 +316,11 @@ public class VolumeServiceImpl implements VolumeService { } VolumeVO vol = volDao.findById(volume.getId()); + if (vol == null) { + s_logger.debug("Volume " + volume.getId() + " is not found"); + future.complete(result); + return future; + } String volumePath = vol.getPath(); Long poolId = vol.getPoolId(); diff --git a/server/src/com/cloud/storage/StorageManagerImpl.java b/server/src/com/cloud/storage/StorageManagerImpl.java index 9553ff3912a..3dc62b64826 100644 --- a/server/src/com/cloud/storage/StorageManagerImpl.java +++ b/server/src/com/cloud/storage/StorageManagerImpl.java @@ -1113,8 +1113,8 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C cleanupSecondaryStorage(recurring); - List vols = _volsDao.listVolumesToBeDestroyed(new Date(System.currentTimeMillis() - ((long) StorageCleanupDelay.value() << 10))); - + // ROOT volumes will be destroyed as part of VM cleanup + List vols = _volsDao.listNonRootVolumesToBeDestroyed(new Date(System.currentTimeMillis() - ((long) StorageCleanupDelay.value() << 10))); for (VolumeVO vol : vols) { try { // If this fails, just log a warning. It's ideal if we clean up the host-side clustered file @@ -1125,7 +1125,12 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C } try { - volService.expungeVolumeAsync(volFactory.getVolume(vol.getId())); + VolumeInfo volumeInfo = volFactory.getVolume(vol.getId()); + if (volumeInfo != null) { + volService.expungeVolumeAsync(volumeInfo); + } else { + s_logger.debug("Volume " + vol.getUuid() + " is already destroyed"); + } } catch (Exception e) { s_logger.warn("Unable to destroy volume " + vol.getUuid(), e); }