Mark VMs in error state when expunge fails during destroy operation (#12749)

* Mark VMs in error state when expunge fails during destroy operation

* fetch volume by external id (used by external plugins)

* review comments

* Update reorder hosts log to DEBUG, log line is too verbose to have on as INFO
This commit is contained in:
Suresh Kumar Anaparti 2026-03-24 08:59:14 +05:30 committed by GitHub
parent e93ae1a4f4
commit bce55945ec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 106 additions and 4 deletions

View File

@ -124,6 +124,9 @@ public interface VirtualMachine extends RunningOn, ControlledEntity, Partition,
s_fsm.addTransition(new Transition<State, Event>(State.Stopping, VirtualMachine.Event.StopRequested, State.Stopping, null));
s_fsm.addTransition(new Transition<State, Event>(State.Stopping, VirtualMachine.Event.AgentReportShutdowned, State.Stopped, Arrays.asList(new Impact[]{Impact.USAGE})));
s_fsm.addTransition(new Transition<State, Event>(State.Expunging, VirtualMachine.Event.OperationFailed, State.Expunging,null));
// Note: In addition to the Stopped -> Error transition for failed VM creation,
// a VM can also transition from Expunging to Error on OperationFailedToError.
s_fsm.addTransition(new Transition<State, Event>(State.Expunging, VirtualMachine.Event.OperationFailedToError, State.Error, null));
s_fsm.addTransition(new Transition<State, Event>(State.Expunging, VirtualMachine.Event.ExpungeOperation, State.Expunging,null));
s_fsm.addTransition(new Transition<State, Event>(State.Error, VirtualMachine.Event.DestroyRequested, State.Expunging, null));
s_fsm.addTransition(new Transition<State, Event>(State.Error, VirtualMachine.Event.ExpungeOperation, State.Expunging, null));

View File

@ -166,4 +166,12 @@ public interface VolumeDao extends GenericDao<VolumeVO, Long>, StateDao<Volume.S
int getVolumeCountByOfferingId(long diskOfferingId);
VolumeVO findByLastIdAndState(long lastVolumeId, Volume.State...states);
/**
* Retrieves volume by its externalId
*
* @param externalUuid
* @return Volume Object of matching search criteria
*/
VolumeVO findByExternalUuid(String externalUuid);
}

View File

@ -74,6 +74,7 @@ public class VolumeDaoImpl extends GenericDaoBase<VolumeVO, Long> implements Vol
private final SearchBuilder<VolumeVO> storeAndInstallPathSearch;
private final SearchBuilder<VolumeVO> volumeIdSearch;
protected GenericSearchBuilder<VolumeVO, Long> CountByAccount;
protected final SearchBuilder<VolumeVO> ExternalUuidSearch;
protected GenericSearchBuilder<VolumeVO, SumCount> primaryStorageSearch;
protected GenericSearchBuilder<VolumeVO, SumCount> primaryStorageSearch2;
protected GenericSearchBuilder<VolumeVO, SumCount> secondaryStorageSearch;
@ -459,6 +460,10 @@ public class VolumeDaoImpl extends GenericDaoBase<VolumeVO, Long> implements Vol
CountByAccount.and("idNIN", CountByAccount.entity().getId(), Op.NIN);
CountByAccount.done();
ExternalUuidSearch = createSearchBuilder();
ExternalUuidSearch.and("externalUuid", ExternalUuidSearch.entity().getExternalUuid(), Op.EQ);
ExternalUuidSearch.done();
primaryStorageSearch = createSearchBuilder(SumCount.class);
primaryStorageSearch.select("sum", Func.SUM, primaryStorageSearch.entity().getSize());
primaryStorageSearch.and("accountId", primaryStorageSearch.entity().getAccountId(), Op.EQ);
@ -934,4 +939,11 @@ public class VolumeDaoImpl extends GenericDaoBase<VolumeVO, Long> implements Vol
sc.and(sc.entity().getState(), SearchCriteria.Op.IN, (Object[]) states);
return sc.find();
}
@Override
public VolumeVO findByExternalUuid(String externalUuid) {
SearchCriteria<VolumeVO> sc = ExternalUuidSearch.create();
sc.setParameters("externalUuid", externalUuid);
return findOneBy(sc);
}
}

View File

@ -1714,7 +1714,7 @@ StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
@Override
public void reorderHostsByPriority(Map<Long, Integer> priorities, List<Host> hosts) {
logger.info("Re-ordering hosts {} by priorities {}", hosts, priorities);
logger.debug("Re-ordering hosts {} by priorities {}", hosts, priorities);
hosts.removeIf(host -> DataCenterDeployment.PROHIBITED_HOST_PRIORITY.equals(getHostPriority(priorities, host.getId())));
@ -1727,7 +1727,7 @@ StateListener<State, VirtualMachine.Event, VirtualMachine>, Configurable {
}
);
logger.info("Hosts after re-ordering are: {}", hosts);
logger.debug("Hosts after re-ordering are: {}", hosts);
}
private Integer getHostPriority(Map<Long, Integer> priorities, Long hostId) {

View File

@ -2578,6 +2578,22 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir
}
}
private void transitionExpungingToError(long vmId) {
UserVmVO vm = _vmDao.findById(vmId);
if (vm != null && vm.getState() == State.Expunging) {
try {
boolean transitioned = _itMgr.stateTransitTo(vm, VirtualMachine.Event.OperationFailedToError, null);
if (transitioned) {
logger.info("Transitioned VM [{}] from Expunging to Error after failed expunge", vm.getUuid());
} else {
logger.warn("Failed to persist transition of VM [{}] from Expunging to Error after failed expunge, possibly due to concurrent update", vm.getUuid());
}
} catch (NoTransitionException e) {
logger.warn("Failed to transition VM {} to Error state: {}", vm, e.getMessage());
}
}
}
/**
* Release network resources, it was done on vm stop previously.
* @param id vm id
@ -3561,8 +3577,19 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir
detachVolumesFromVm(vm, dataVols);
UserVm destroyedVm = destroyVm(vmId, expunge);
if (expunge && !expunge(vm)) {
throw new CloudRuntimeException("Failed to expunge vm " + destroyedVm);
if (expunge) {
boolean expunged = false;
String errorMsg = "";
try {
expunged = expunge(vm);
} catch (RuntimeException e) {
logger.error("Failed to expunge VM [{}] due to: {}", vm, e.getMessage(), e);
errorMsg = e.getMessage();
}
if (!expunged) {
transitionExpungingToError(vm.getId());
throw new CloudRuntimeException("Failed to expunge VM " + vm.getUuid() + (StringUtils.isNotBlank(errorMsg) ? " due to: " + errorMsg : ""));
}
}
autoScaleManager.removeVmFromVmGroup(vmId);

View File

@ -60,6 +60,7 @@ import java.util.TimeZone;
import java.util.UUID;
import com.cloud.storage.dao.SnapshotPolicyDao;
import com.cloud.utils.fsm.NoTransitionException;
import org.apache.cloudstack.acl.ControlledEntity;
import org.apache.cloudstack.acl.SecurityChecker;
import org.apache.cloudstack.api.ApiCommandResourceType;
@ -4177,4 +4178,55 @@ public class UserVmManagerImplTest {
verify(userVmDao, times(1)).releaseFromLockTable(vmId);
}
@Test
public void testTransitionExpungingToErrorVmInExpungingState() throws Exception {
UserVmVO vm = mock(UserVmVO.class);
when(vm.getState()).thenReturn(VirtualMachine.State.Expunging);
when(vm.getUuid()).thenReturn("test-uuid");
when(userVmDao.findById(vmId)).thenReturn(vm);
when(virtualMachineManager.stateTransitTo(eq(vm), eq(VirtualMachine.Event.OperationFailedToError), eq(null))).thenReturn(true);
java.lang.reflect.Method method = UserVmManagerImpl.class.getDeclaredMethod("transitionExpungingToError", long.class);
method.setAccessible(true);
method.invoke(userVmManagerImpl, vmId);
Mockito.verify(virtualMachineManager).stateTransitTo(vm, VirtualMachine.Event.OperationFailedToError, null);
}
@Test
public void testTransitionExpungingToErrorVmNotInExpungingState() throws Exception {
UserVmVO vm = mock(UserVmVO.class);
when(vm.getState()).thenReturn(VirtualMachine.State.Stopped);
when(userVmDao.findById(vmId)).thenReturn(vm);
java.lang.reflect.Method method = UserVmManagerImpl.class.getDeclaredMethod("transitionExpungingToError", long.class);
method.setAccessible(true);
method.invoke(userVmManagerImpl, vmId);
Mockito.verify(virtualMachineManager, Mockito.never()).stateTransitTo(any(VirtualMachine.class), any(VirtualMachine.Event.class), any());
}
@Test
public void testTransitionExpungingToErrorVmNotFound() throws Exception {
when(userVmDao.findById(vmId)).thenReturn(null);
java.lang.reflect.Method method = UserVmManagerImpl.class.getDeclaredMethod("transitionExpungingToError", long.class);
method.setAccessible(true);
method.invoke(userVmManagerImpl, vmId);
Mockito.verify(virtualMachineManager, Mockito.never()).stateTransitTo(any(VirtualMachine.class), any(VirtualMachine.Event.class), any());
}
@Test
public void testTransitionExpungingToErrorHandlesNoTransitionException() throws Exception {
UserVmVO vm = mock(UserVmVO.class);
when(vm.getState()).thenReturn(VirtualMachine.State.Expunging);
when(userVmDao.findById(vmId)).thenReturn(vm);
when(virtualMachineManager.stateTransitTo(eq(vm), eq(VirtualMachine.Event.OperationFailedToError), eq(null)))
.thenThrow(new NoTransitionException("no transition"));
java.lang.reflect.Method method = UserVmManagerImpl.class.getDeclaredMethod("transitionExpungingToError", long.class);
method.setAccessible(true);
method.invoke(userVmManagerImpl, vmId);
}
}