From 9a35f87d370fb781763d1d243d2f225e3fb60c4a Mon Sep 17 00:00:00 2001 From: Sanjay Tripathi Date: Thu, 26 Feb 2015 15:34:29 +0530 Subject: [PATCH] CLOUDSTACK-8835: Added alerts incase of template download failure Reviewed-By: Devdeep --- .../apache/cloudstack/alert/AlertService.java | 1 + .../storage/image/TemplateServiceImpl.java | 1 + .../image/BaseImageStoreDriverImpl.java | 12 ++++ .../src/com/cloud/alert/AlertManagerImpl.java | 3 +- .../storage/ImageStoreUploadMonitorImpl.java | 55 +++++++++++++------ 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/api/src/org/apache/cloudstack/alert/AlertService.java b/api/src/org/apache/cloudstack/alert/AlertService.java index 2e98aea407a..2b827eb0033 100644 --- a/api/src/org/apache/cloudstack/alert/AlertService.java +++ b/api/src/org/apache/cloudstack/alert/AlertService.java @@ -65,6 +65,7 @@ public interface AlertService { public static final AlertType ALERT_TYPE_LOCAL_STORAGE = new AlertType((short)25, "ALERT.STORAGE.LOCAL", true); public static final AlertType ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED = new AlertType((short)26, "ALERT.RESOURCE.EXCEED", true); public static final AlertType ALERT_TYPE_SYNC = new AlertType((short)27, "ALERT.TYPE.SYNC", true); + public static final AlertType ALERT_TYPE_UPLOAD_FAILED = new AlertType((short)28, "ALERT.UPLOAD.FAILED", true); public short getType() { return type; diff --git a/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java b/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java index ccb70c4aba3..69dc7c71294 100644 --- a/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java +++ b/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java @@ -334,6 +334,7 @@ public class TemplateServiceImpl implements TemplateService { String msg = "Template " + tmplt.getName() + ":" + tmplt.getId() + " is corrupted on secondary storage " + tmpltStore.getId(); tmpltStore.setErrorString(msg); s_logger.info(msg); + _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED, zoneId, null, msg, msg); if (tmplt.getState() == VirtualMachineTemplate.State.NotUploaded || tmplt.getState() == VirtualMachineTemplate.State.UploadInProgress) { s_logger.info("Template Sync found " + uniqueName + " on image store " + storeId + " uploaded using SSVM as corrupted, marking it as failed"); tmpltStore.setState(State.Failed); diff --git a/engine/storage/src/org/apache/cloudstack/storage/image/BaseImageStoreDriverImpl.java b/engine/storage/src/org/apache/cloudstack/storage/image/BaseImageStoreDriverImpl.java index 25aa8e80e0e..9c7dc46482a 100644 --- a/engine/storage/src/org/apache/cloudstack/storage/image/BaseImageStoreDriverImpl.java +++ b/engine/storage/src/org/apache/cloudstack/storage/image/BaseImageStoreDriverImpl.java @@ -50,10 +50,12 @@ import com.cloud.agent.api.storage.DownloadAnswer; import com.cloud.agent.api.storage.Proxy; import com.cloud.agent.api.to.DataObjectType; import com.cloud.agent.api.to.DataTO; +import com.cloud.alert.AlertManager; import com.cloud.storage.VMTemplateStorageResourceAssoc; import com.cloud.storage.VMTemplateVO; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.VMTemplateDao; +import com.cloud.storage.dao.VMTemplateZoneDao; import com.cloud.storage.dao.VolumeDao; import com.cloud.storage.download.DownloadMonitor; @@ -73,6 +75,10 @@ public abstract class BaseImageStoreDriverImpl implements ImageStoreDriver { EndPointSelector _epSelector; @Inject ConfigurationDao configDao; + @Inject + VMTemplateZoneDao _vmTemplateZoneDao; + @Inject + AlertManager _alertMgr; protected String _proxy = null; protected Proxy getHttpProxy() { @@ -177,6 +183,9 @@ public abstract class BaseImageStoreDriverImpl implements ImageStoreDriver { result.setSuccess(false); result.setResult(answer.getErrorString()); caller.complete(result); + String msg = "Failed to register template: " + obj.getUuid() + " with error: " + answer.getErrorString(); + _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED, _vmTemplateZoneDao.listByTemplateId(obj.getId()).get(0).getZoneId(), null, msg, msg); + s_logger.error(msg); } else if (answer.getDownloadStatus() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED) { if (answer.getCheckSum() != null) { VMTemplateVO templateDaoBuilder = _templateDao.createForUpdate(); @@ -229,6 +238,9 @@ public abstract class BaseImageStoreDriverImpl implements ImageStoreDriver { result.setSuccess(false); result.setResult(answer.getErrorString()); caller.complete(result); + String msg = "Failed to upload volume: " + obj.getUuid() + " with error: " + answer.getErrorString(); + _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED, volStoreVO.getZoneId(), null, msg, msg); + s_logger.error(msg); } else if (answer.getDownloadStatus() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED) { CreateCmdResult result = new CreateCmdResult(null, null); caller.complete(result); diff --git a/server/src/com/cloud/alert/AlertManagerImpl.java b/server/src/com/cloud/alert/AlertManagerImpl.java index 86530699675..189e473b6f6 100644 --- a/server/src/com/cloud/alert/AlertManagerImpl.java +++ b/server/src/com/cloud/alert/AlertManagerImpl.java @@ -759,7 +759,8 @@ public class AlertManagerImpl extends ManagerBase implements AlertManager, Confi (alertType != AlertManager.AlertType.ALERT_TYPE_SSVM) && (alertType != AlertManager.AlertType.ALERT_TYPE_STORAGE_MISC) && (alertType != AlertManager.AlertType.ALERT_TYPE_MANAGMENT_NODE) && - (alertType != AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED)) { + (alertType != AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED) && + (alertType != AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED)) { alert = _alertDao.getLastAlert(alertType.getType(), dataCenterId, podId, clusterId); } diff --git a/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java b/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java index 167f19f7cc8..8b462d37253 100755 --- a/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java +++ b/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java @@ -54,12 +54,14 @@ import com.cloud.agent.api.AgentControlCommand; import com.cloud.agent.api.Answer; import com.cloud.agent.api.Command; import com.cloud.agent.api.StartupCommand; +import com.cloud.alert.AlertManager; import com.cloud.exception.ConnectionException; import com.cloud.host.Host; import com.cloud.host.Status; import com.cloud.host.dao.HostDao; import com.cloud.storage.Volume.Event; import com.cloud.storage.dao.VMTemplateDao; +import com.cloud.storage.dao.VMTemplateZoneDao; import com.cloud.storage.dao.VolumeDao; import com.cloud.template.VirtualMachineTemplate; import com.cloud.utils.component.ManagerBase; @@ -96,6 +98,10 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto private DataStoreManager storeMgr; @Inject ResourceLimitService _resourceLimitMgr; + @Inject + private AlertManager _alertMgr; + @Inject + private VMTemplateZoneDao _vmTemplateZoneDao; private long _nodeId; private ScheduledExecutorService _executor = null; @@ -275,6 +281,8 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto public void doInTransactionWithoutResult(TransactionStatus status) { VolumeVO tmpVolume = _volumeDao.findById(volume.getId()); VolumeDataStoreVO tmpVolumeDataStore = _volumeDataStoreDao.findById(volumeDataStore.getId()); + boolean sendAlert = false; + String msg = null; try { switch (answer.getStatus()) { case COMPLETED: @@ -305,9 +313,9 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto tmpVolumeDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR); tmpVolumeDataStore.setState(State.Failed); stateMachine.transitTo(tmpVolume, Event.OperationFailed, null, _volumeDao); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Volume " + tmpVolume.getUuid() + " failed to upload due to operation timed out"); - } + msg = "Volume " + tmpVolume.getUuid() + " failed to upload due to operation timed out"; + s_logger.error(msg); + sendAlert = true; } else { tmpVolumeDataStore.setDownloadPercent(answer.getDownloadPercent()); } @@ -317,9 +325,9 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto tmpVolumeDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR); tmpVolumeDataStore.setState(State.Failed); stateMachine.transitTo(tmpVolume, Event.OperationFailed, null, _volumeDao); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Volume " + tmpVolume.getUuid() + " failed to upload. Error details: " + answer.getDetails()); - } + msg = "Volume " + tmpVolume.getUuid() + " failed to upload. Error details: " + answer.getDetails(); + s_logger.error(msg); + sendAlert = true; break; case UNKNOWN: if (tmpVolume.getState() == Volume.State.NotUploaded) { // check for timeout @@ -327,9 +335,9 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto tmpVolumeDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.ABANDONED); tmpVolumeDataStore.setState(State.Failed); stateMachine.transitTo(tmpVolume, Event.OperationTimeout, null, _volumeDao); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Volume " + tmpVolume.getUuid() + " failed to upload due to operation timed out"); - } + msg = "Volume " + tmpVolume.getUuid() + " failed to upload due to operation timed out"; + s_logger.error(msg); + sendAlert = true; } } break; @@ -337,6 +345,10 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto _volumeDataStoreDao.update(tmpVolumeDataStore.getId(), tmpVolumeDataStore); } catch (NoTransitionException e) { s_logger.error("Unexpected error " + e.getMessage()); + } finally { + if (sendAlert) { + _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED, tmpVolume.getDataCenterId(), null, msg, msg); + } } } }); @@ -349,6 +361,8 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto public void doInTransactionWithoutResult(TransactionStatus status) { VMTemplateVO tmpTemplate = _templateDao.findById(template.getId()); TemplateDataStoreVO tmpTemplateDataStore = _templateDataStoreDao.findById(templateDataStore.getId()); + boolean sendAlert = false; + String msg = null; try { switch (answer.getStatus()) { case COMPLETED: @@ -380,9 +394,9 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto tmpTemplateDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR); tmpTemplateDataStore.setState(State.Failed); stateMachine.transitTo(tmpTemplate, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Template " + tmpTemplate.getUuid() + " failed to upload due to operation timed out"); - } + msg = "Template " + tmpTemplate.getUuid() + " failed to upload due to operation timed out"; + s_logger.error(msg); + sendAlert = true; } else { tmpTemplateDataStore.setDownloadPercent(answer.getDownloadPercent()); } @@ -392,9 +406,9 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto tmpTemplateDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR); tmpTemplateDataStore.setState(State.Failed); stateMachine.transitTo(tmpTemplate, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Template " + tmpTemplate.getUuid() + " failed to upload. Error details: " + answer.getDetails()); - } + msg = "Template " + tmpTemplate.getUuid() + " failed to upload. Error details: " + answer.getDetails(); + s_logger.error(msg); + sendAlert = true; break; case UNKNOWN: if (tmpTemplate.getState() == VirtualMachineTemplate.State.NotUploaded) { // check for timeout @@ -402,9 +416,9 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto tmpTemplateDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.ABANDONED); tmpTemplateDataStore.setState(State.Failed); stateMachine.transitTo(tmpTemplate, VirtualMachineTemplate.Event.OperationTimeout, null, _templateDao); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Template " + tmpTemplate.getUuid() + " failed to upload due to operation timed out"); - } + msg = "Template " + tmpTemplate.getUuid() + " failed to upload due to operation timed out"; + s_logger.error(msg); + sendAlert = true; } } break; @@ -412,6 +426,11 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto _templateDataStoreDao.update(tmpTemplateDataStore.getId(), tmpTemplateDataStore); } catch (NoTransitionException e) { s_logger.error("Unexpected error " + e.getMessage()); + } finally { + if (sendAlert) { + _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED, + _vmTemplateZoneDao.listByTemplateId(tmpTemplate.getId()).get(0).getZoneId(), null, msg, msg); + } } } });