mirror of https://github.com/apache/cloudstack.git
volume-upload: If SSVM is destroyed and started, then partially uploaded volumes/templates remain in
inconsistent state (NotUploaded or UploadInProgress) and doesn't transition to any terminal state (Uploaded, Uploaded). As a result these volumes/templates cannot be removed. Fix is to handle such volume/template entries correctly.
This commit is contained in:
parent
f172fbef8a
commit
bc399b981f
|
|
@ -29,7 +29,6 @@ import javax.inject.Inject;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult;
|
||||
import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult;
|
||||
import org.apache.cloudstack.engine.subsystem.api.storage.DataMotionService;
|
||||
|
|
@ -87,12 +86,15 @@ import com.cloud.storage.dao.VMTemplateZoneDao;
|
|||
import com.cloud.storage.template.TemplateConstants;
|
||||
import com.cloud.storage.template.TemplateProp;
|
||||
import com.cloud.template.TemplateManager;
|
||||
import com.cloud.template.VirtualMachineTemplate;
|
||||
import com.cloud.user.Account;
|
||||
import com.cloud.user.AccountManager;
|
||||
import com.cloud.user.ResourceLimitService;
|
||||
import com.cloud.utils.UriUtils;
|
||||
import com.cloud.utils.db.GlobalLock;
|
||||
import com.cloud.utils.exception.CloudRuntimeException;
|
||||
import com.cloud.utils.fsm.NoTransitionException;
|
||||
import com.cloud.utils.fsm.StateMachine2;
|
||||
|
||||
@Component
|
||||
public class TemplateServiceImpl implements TemplateService {
|
||||
|
|
@ -315,6 +317,7 @@ public class TemplateServiceImpl implements TemplateService {
|
|||
|
||||
toBeDownloaded.addAll(allTemplates);
|
||||
|
||||
final StateMachine2<VirtualMachineTemplate.State, VirtualMachineTemplate.Event, VirtualMachineTemplate> stateMachine = VirtualMachineTemplate.State.getStateMachine();
|
||||
for (VMTemplateVO tmplt : allTemplates) {
|
||||
String uniqueName = tmplt.getUniqueName();
|
||||
TemplateDataStoreVO tmpltStore = _vmTemplateStoreDao.findByStoreTemplate(storeId, tmplt.getId());
|
||||
|
|
@ -330,18 +333,23 @@ public class TemplateServiceImpl implements TemplateService {
|
|||
tmpltStore.setDownloadState(Status.DOWNLOAD_ERROR);
|
||||
String msg = "Template " + tmplt.getName() + ":" + tmplt.getId() + " is corrupted on secondary storage " + tmpltStore.getId();
|
||||
tmpltStore.setErrorString(msg);
|
||||
s_logger.info("msg");
|
||||
if (tmplt.getUrl() == null) {
|
||||
msg =
|
||||
"Private Template (" + tmplt + ") with install path " + tmpltInfo.getInstallPath() +
|
||||
"is corrupted, please check in image store: " + tmpltStore.getDataStoreId();
|
||||
s_logger.info(msg);
|
||||
if (tmplt.getState() == VirtualMachineTemplate.State.NotUploaded || tmplt.getState() == VirtualMachineTemplate.State.UploadInProgress) {
|
||||
s_logger.info("Template Sync found " + uniqueName + " on image store " + storeId + " uploaded using SSVM as corrupted, marking it as failed");
|
||||
tmpltStore.setState(State.Failed);
|
||||
try {
|
||||
stateMachine.transitTo(tmplt, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao);
|
||||
} catch (NoTransitionException e) {
|
||||
s_logger.error("Unexpected state transition exception for template " + tmplt.getName() + ". Details: " + e.getMessage());
|
||||
}
|
||||
} else if (tmplt.getUrl() == null) {
|
||||
msg = "Private template (" + tmplt + ") with install path " + tmpltInfo.getInstallPath() + " is corrupted, please check in image store: " + tmpltStore.getDataStoreId();
|
||||
s_logger.warn(msg);
|
||||
} else {
|
||||
s_logger.info("Removing template_store_ref entry for corrupted template " + tmplt.getName());
|
||||
_vmTemplateStoreDao.remove(tmpltStore.getId());
|
||||
toBeDownloaded.add(tmplt);
|
||||
}
|
||||
|
||||
} else {
|
||||
tmpltStore.setDownloadPercent(100);
|
||||
tmpltStore.setDownloadState(Status.DOWNLOADED);
|
||||
|
|
@ -355,6 +363,14 @@ public class TemplateServiceImpl implements TemplateService {
|
|||
tmlpt.setSize(tmpltInfo.getSize());
|
||||
_templateDao.update(tmplt.getId(), tmlpt);
|
||||
|
||||
if (tmplt.getState() == VirtualMachineTemplate.State.NotUploaded || tmplt.getState() == VirtualMachineTemplate.State.UploadInProgress) {
|
||||
try {
|
||||
stateMachine.transitTo(tmplt, VirtualMachineTemplate.Event.OperationSucceeded, null, _templateDao);
|
||||
} catch (NoTransitionException e) {
|
||||
s_logger.error("Unexpected state transition exception for template " + tmplt.getName() + ". Details: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// Skipping limit checks for SYSTEM Account and for the templates created from volumes or snapshots
|
||||
// which already got checked and incremented during createTemplate API call.
|
||||
if (tmpltInfo.getSize() > 0 && tmplt.getAccountId() != Account.ACCOUNT_ID_SYSTEM && tmplt.getUrl() != null) {
|
||||
|
|
@ -374,9 +390,7 @@ public class TemplateServiceImpl implements TemplateService {
|
|||
}
|
||||
_vmTemplateStoreDao.update(tmpltStore.getId(), tmpltStore);
|
||||
} else {
|
||||
tmpltStore =
|
||||
new TemplateDataStoreVO(storeId, tmplt.getId(), new Date(), 100, Status.DOWNLOADED, null, null, null, tmpltInfo.getInstallPath(),
|
||||
tmplt.getUrl());
|
||||
tmpltStore = new TemplateDataStoreVO(storeId, tmplt.getId(), new Date(), 100, Status.DOWNLOADED, null, null, null, tmpltInfo.getInstallPath(), tmplt.getUrl());
|
||||
tmpltStore.setSize(tmpltInfo.getSize());
|
||||
tmpltStore.setPhysicalSize(tmpltInfo.getPhysicalSize());
|
||||
tmpltStore.setDataStoreRole(store.getRole());
|
||||
|
|
@ -387,18 +401,28 @@ public class TemplateServiceImpl implements TemplateService {
|
|||
tmlpt.setSize(tmpltInfo.getSize());
|
||||
_templateDao.update(tmplt.getId(), tmlpt);
|
||||
associateTemplateToZone(tmplt.getId(), zoneId);
|
||||
|
||||
}
|
||||
} else if (tmplt.getState() == VirtualMachineTemplate.State.NotUploaded || tmplt.getState() == VirtualMachineTemplate.State.UploadInProgress) {
|
||||
s_logger.info("Template Sync did not find " + uniqueName + " on image store " + storeId + " uploaded using SSVM, marking it as failed");
|
||||
toBeDownloaded.remove(tmplt);
|
||||
tmpltStore.setDownloadState(Status.DOWNLOAD_ERROR);
|
||||
String msg = "Template " + tmplt.getName() + ":" + tmplt.getId() + " is corrupted on secondary storage " + tmpltStore.getId();
|
||||
tmpltStore.setErrorString(msg);
|
||||
tmpltStore.setState(State.Failed);
|
||||
_vmTemplateStoreDao.update(tmpltStore.getId(), tmpltStore);
|
||||
try {
|
||||
stateMachine.transitTo(tmplt, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao);
|
||||
} catch (NoTransitionException e) {
|
||||
s_logger.error("Unexpected state transition exception for template " + tmplt.getName() + ". Details: " + e.getMessage());
|
||||
}
|
||||
} else {
|
||||
s_logger.info("Template Sync did not find " + uniqueName + " on image store " + storeId +
|
||||
", may request download based on available hypervisor types");
|
||||
s_logger.info("Template Sync did not find " + uniqueName + " on image store " + storeId + ", may request download based on available hypervisor types");
|
||||
if (tmpltStore != null) {
|
||||
if (_storeMgr.isRegionStore(store) && tmpltStore.getDownloadState() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED
|
||||
&& tmpltStore.getState() == State.Ready
|
||||
&& tmpltStore.getInstallPath() == null) {
|
||||
s_logger.info("Keep fake entry in template store table for migration of previous NFS to object store");
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
s_logger.info("Removing leftover template " + uniqueName + " entry from template store table");
|
||||
// remove those leftover entries
|
||||
_vmTemplateStoreDao.remove(tmpltStore.getId());
|
||||
|
|
@ -422,7 +446,7 @@ public class TemplateServiceImpl implements TemplateService {
|
|||
availHypers.add(HypervisorType.None); // bug 9809: resume ISO
|
||||
// download.
|
||||
for (VMTemplateVO tmplt : toBeDownloaded) {
|
||||
if (tmplt.getUrl() == null) { // If url is null we can't
|
||||
if (tmplt.getUrl() == null) { // If url is null, skip downloading
|
||||
s_logger.info("Skip downloading template " + tmplt.getUniqueName() + " since no url is specified.");
|
||||
continue;
|
||||
}
|
||||
|
|
@ -458,9 +482,7 @@ public class TemplateServiceImpl implements TemplateService {
|
|||
for (String uniqueName : templateInfos.keySet()) {
|
||||
TemplateProp tInfo = templateInfos.get(uniqueName);
|
||||
if (_tmpltMgr.templateIsDeleteable(tInfo.getId())) {
|
||||
// we cannot directly call deleteTemplateSync here to
|
||||
// reuse delete logic since in this case, our db does not have
|
||||
// this template at all.
|
||||
// we cannot directly call deleteTemplateSync here to reuse delete logic since in this case db does not have this template at all.
|
||||
TemplateObjectTO tmplTO = new TemplateObjectTO();
|
||||
tmplTO.setDataStore(store.getTO());
|
||||
tmplTO.setPath(tInfo.getInstallPath());
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ public class ObjectInDataStoreManagerImpl implements ObjectInDataStoreManager {
|
|||
stateMachines = new StateMachine2<State, Event, DataObjectInStore>();
|
||||
stateMachines.addTransition(State.Allocated, Event.CreateOnlyRequested, State.Creating);
|
||||
stateMachines.addTransition(State.Allocated, Event.DestroyRequested, State.Destroying);
|
||||
stateMachines.addTransition(State.Allocated, Event.OperationFailed, State.Failed);
|
||||
stateMachines.addTransition(State.Creating, Event.OperationFailed, State.Allocated);
|
||||
stateMachines.addTransition(State.Creating, Event.OperationSuccessed, State.Ready);
|
||||
stateMachines.addTransition(State.Ready, Event.CopyingRequested, State.Copying);
|
||||
|
|
|
|||
|
|
@ -1390,7 +1390,7 @@ public class VolumeServiceImpl implements VolumeService {
|
|||
for (VolumeDataStoreVO volumeStore : dbVolumes) {
|
||||
VolumeVO volume = _volumeDao.findById(volumeStore.getVolumeId());
|
||||
if (volume == null) {
|
||||
s_logger.warn("Volume_store_ref shows that volume " + volumeStore.getVolumeId() + " is on image store " + storeId +
|
||||
s_logger.warn("Volume_store_ref table shows that volume " + volumeStore.getVolumeId() + " is on image store " + storeId +
|
||||
", but the volume is not found in volumes table, potentially some bugs in deleteVolume, so we just treat this volume to be deleted and mark it as destroyed");
|
||||
volumeStore.setDestroyed(true);
|
||||
_volumeStoreDao.update(volumeStore.getId(), volumeStore);
|
||||
|
|
@ -1406,20 +1406,23 @@ public class VolumeServiceImpl implements VolumeService {
|
|||
}
|
||||
if (volInfo.isCorrupted()) {
|
||||
volumeStore.setDownloadState(Status.DOWNLOAD_ERROR);
|
||||
String msg = "Volume " + volume.getUuid() + " is corrupted on image store ";
|
||||
String msg = "Volume " + volume.getUuid() + " is corrupted on image store";
|
||||
volumeStore.setErrorString(msg);
|
||||
s_logger.info("msg");
|
||||
if (volumeStore.getDownloadUrl() == null) {
|
||||
msg =
|
||||
"Volume (" + volume.getUuid() + ") with install path " + volInfo.getInstallPath() +
|
||||
"is corrupted, please check in image store: " + volumeStore.getDataStoreId();
|
||||
s_logger.info(msg);
|
||||
if (volume.getState() == State.NotUploaded || volume.getState() == State.UploadInProgress) {
|
||||
s_logger.info("Volume Sync found " + volume.getUuid() + " uploaded using SSVM on image store " + storeId + " as corrupted, marking it as failed");
|
||||
_volumeStoreDao.update(volumeStore.getId(), volumeStore);
|
||||
// mark volume as failed, so that storage GC will clean it up
|
||||
VolumeObject volObj = (VolumeObject)volFactory.getVolume(volume.getId());
|
||||
volObj.processEvent(Event.OperationFailed);
|
||||
} else if (volumeStore.getDownloadUrl() == null) {
|
||||
msg = "Volume (" + volume.getUuid() + ") with install path " + volInfo.getInstallPath() + " is corrupted, please check in image store: " + volumeStore.getDataStoreId();
|
||||
s_logger.warn(msg);
|
||||
} else {
|
||||
s_logger.info("Removing volume_store_ref entry for corrupted volume " + volume.getName());
|
||||
_volumeStoreDao.remove(volumeStore.getId());
|
||||
toBeDownloaded.add(volumeStore);
|
||||
}
|
||||
|
||||
} else { // Put them in right status
|
||||
volumeStore.setDownloadPercent(100);
|
||||
volumeStore.setDownloadState(Status.DOWNLOADED);
|
||||
|
|
@ -1436,15 +1439,18 @@ public class VolumeServiceImpl implements VolumeService {
|
|||
_volumeDao.update(volumeStore.getVolumeId(), volume);
|
||||
}
|
||||
|
||||
if (volume.getState() == State.NotUploaded || volume.getState() == State.UploadInProgress) {
|
||||
VolumeObject volObj = (VolumeObject)volFactory.getVolume(volume.getId());
|
||||
volObj.processEvent(Event.OperationSuccessed);
|
||||
}
|
||||
|
||||
if (volInfo.getSize() > 0) {
|
||||
try {
|
||||
_resourceLimitMgr.checkResourceLimit(_accountMgr.getAccount(volume.getAccountId()),
|
||||
com.cloud.configuration.Resource.ResourceType.secondary_storage, volInfo.getSize() - volInfo.getPhysicalSize());
|
||||
} catch (ResourceAllocationException e) {
|
||||
s_logger.warn(e.getMessage());
|
||||
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED, volume.getDataCenterId(), volume.getPodId(),
|
||||
e.getMessage(),
|
||||
e.getMessage());
|
||||
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED, volume.getDataCenterId(), volume.getPodId(), e.getMessage(), e.getMessage());
|
||||
} finally {
|
||||
_resourceLimitMgr.recalculateResourceCount(volume.getAccountId(), volume.getDomainId(),
|
||||
com.cloud.configuration.Resource.ResourceType.secondary_storage.getOrdinal());
|
||||
|
|
@ -1452,11 +1458,21 @@ public class VolumeServiceImpl implements VolumeService {
|
|||
}
|
||||
}
|
||||
continue;
|
||||
} else if (volume.getState() == State.NotUploaded || volume.getState() == State.UploadInProgress) { // failed uploads through SSVM
|
||||
s_logger.info("Volume Sync did not find " + volume.getUuid() + " uploaded using SSVM on image store " + storeId + ", marking it as failed");
|
||||
toBeDownloaded.remove(volumeStore);
|
||||
volumeStore.setDownloadState(Status.DOWNLOAD_ERROR);
|
||||
String msg = "Volume " + volume.getUuid() + " is corrupted on image store";
|
||||
volumeStore.setErrorString(msg);
|
||||
_volumeStoreDao.update(volumeStore.getId(), volumeStore);
|
||||
// mark volume as failed, so that storage GC will clean it up
|
||||
VolumeObject volObj = (VolumeObject)volFactory.getVolume(volume.getId());
|
||||
volObj.processEvent(Event.OperationFailed);
|
||||
continue;
|
||||
}
|
||||
// Volume is not on secondary but we should download.
|
||||
if (volumeStore.getDownloadState() != Status.DOWNLOADED) {
|
||||
s_logger.info("Volume Sync did not find " + volume.getName() + " ready on image store " + storeId +
|
||||
", will request download to start/resume shortly");
|
||||
s_logger.info("Volume Sync did not find " + volume.getName() + " ready on image store " + storeId + ", will request download to start/resume shortly");
|
||||
toBeDownloaded.add(volumeStore);
|
||||
}
|
||||
}
|
||||
|
|
@ -1464,7 +1480,7 @@ public class VolumeServiceImpl implements VolumeService {
|
|||
// Download volumes which haven't been downloaded yet.
|
||||
if (toBeDownloaded.size() > 0) {
|
||||
for (VolumeDataStoreVO volumeHost : toBeDownloaded) {
|
||||
if (volumeHost.getDownloadUrl() == null) { // If url is null we
|
||||
if (volumeHost.getDownloadUrl() == null) { // If url is null, skip downloading
|
||||
s_logger.info("Skip downloading volume " + volumeHost.getVolumeId() + " since no download url is specified.");
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1472,8 +1488,7 @@ public class VolumeServiceImpl implements VolumeService {
|
|||
// if this is a region store, and there is already an DOWNLOADED entry there without install_path information, which
|
||||
// means that this is a duplicate entry from migration of previous NFS to staging.
|
||||
if (store.getScope().getScopeType() == ScopeType.REGION) {
|
||||
if (volumeHost.getDownloadState() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED
|
||||
&& volumeHost.getInstallPath() == null) {
|
||||
if (volumeHost.getDownloadState() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED && volumeHost.getInstallPath() == null) {
|
||||
s_logger.info("Skip sync volume for migration of previous NFS to object store");
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1493,9 +1508,7 @@ public class VolumeServiceImpl implements VolumeService {
|
|||
Long uniqueName = entry.getKey();
|
||||
TemplateProp tInfo = entry.getValue();
|
||||
|
||||
//we cannot directly call expungeVolumeAsync here to
|
||||
// reuse delete logic since in this case, our db does not have
|
||||
// this template at all.
|
||||
// we cannot directly call expungeVolumeAsync here to reuse delete logic since in this case db does not have this volume at all.
|
||||
VolumeObjectTO tmplTO = new VolumeObjectTO();
|
||||
tmplTO.setDataStore(store.getTO());
|
||||
tmplTO.setPath(tInfo.getInstallPath());
|
||||
|
|
|
|||
|
|
@ -303,6 +303,7 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto
|
|||
} else if (tmpVolume.getState() == Volume.State.UploadInProgress) { // check for timeout
|
||||
if (System.currentTimeMillis() - tmpVolumeDataStore.getCreated().getTime() > _uploadOperationTimeout) {
|
||||
tmpVolumeDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR);
|
||||
tmpVolumeDataStore.setState(State.Failed);
|
||||
stateMachine.transitTo(tmpVolume, Event.OperationFailed, null, _volumeDao);
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Volume " + tmpVolume.getUuid() + " failed to upload due to operation timed out");
|
||||
|
|
@ -377,6 +378,7 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto
|
|||
} else if (tmpTemplate.getState() == VirtualMachineTemplate.State.UploadInProgress) { // check for timeout
|
||||
if (System.currentTimeMillis() - tmpTemplateDataStore.getCreated().getTime() > _uploadOperationTimeout) {
|
||||
tmpTemplateDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR);
|
||||
tmpTemplateDataStore.setState(State.Failed);
|
||||
stateMachine.transitTo(tmpTemplate, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao);
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Template " + tmpTemplate.getUuid() + " failed to upload due to operation timed out");
|
||||
|
|
|
|||
Loading…
Reference in New Issue