diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index b552903dcf3..fcd6cc9247d 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -2058,6 +2058,9 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { boolean doCidrCheck = true; ClusterVO clusterVO = _clusterDao.findById(clusterId); + if(clusterVO == null){ + throw new IllegalArgumentException("Cluster with id " + clusterId + " does not exist, cannot add host with this clusterId"); + } if (clusterVO.getHypervisorType() != scc.getHypervisorType()) { throw new IllegalArgumentException("Can't add host whose hypervisor type is: " + scc.getHypervisorType() + " into cluster: " + clusterId + " whose hypervisor type is: " + clusterVO.getHypervisorType()); diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java index 67c93d9bfb9..9ed6f88ec0c 100644 --- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -68,10 +68,8 @@ import com.cloud.utils.component.ComponentLocator; import com.cloud.utils.component.Inject; import com.cloud.utils.concurrency.NamedThreadFactory; import com.cloud.utils.exception.CloudRuntimeException; -import com.cloud.utils.fsm.NoTransitionException; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; -import com.cloud.vm.VirtualMachine.Event; import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.VirtualMachineProfile; @@ -269,9 +267,17 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu if (hostId == null) { try { s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm); - _itMgr.stateTransitTo(vm, Event.OperationFailed, null); - } catch (NoTransitionException e) { - } + _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); + } catch (ResourceUnavailableException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } catch (OperationTimedoutException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } catch (ConcurrentOperationException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } return; } @@ -300,12 +306,15 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu try { _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (ResourceUnavailableException e) { - // FIXME + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { - // FIXME + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { - // FIXME - } + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } } List items = _haDao.findPreviousHA(vm.getId()); @@ -454,7 +463,19 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu work.setStep(Step.Scheduled); _haDao.update(work.getId(), work); } else { - assert false : "How come that HA step is Investigating and the host is removed?"; + s_logger.debug("How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways"); + try { + _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); + } catch (ResourceUnavailableException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } catch (OperationTimedoutException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } catch (ConcurrentOperationException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } } } @@ -594,12 +615,6 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu s_logger.info("Stopping " + vm); try { if (work.getWorkType() == WorkType.Stop) { - if (vm.getHostId() == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug(vm.toString() + " has already been stopped"); - } - return null; - } if (_itMgr.advanceStop(vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { s_logger.info("Successfully stopped " + vm); return null; diff --git a/server/src/com/cloud/vm/ItWorkVO.java b/server/src/com/cloud/vm/ItWorkVO.java index 14ce83fb42f..d1f9b66115d 100644 --- a/server/src/com/cloud/vm/ItWorkVO.java +++ b/server/src/com/cloud/vm/ItWorkVO.java @@ -132,6 +132,10 @@ public class ItWorkVO { return managementServerId; } + public void setManagementServerId(long managementServerId) { + this.managementServerId = managementServerId; + } + public State getType() { return type; } diff --git a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java index a3f27d4c0b9..9640758ffb0 100755 --- a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -924,14 +924,34 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } return true; } - + //grab outstanding work item if any + ItWorkVO work = _workDao.findByOutstandingWork(vm.getId(), vm.getState()); + if(work != null){ + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found an outstanding work item for this vm "+ vm + " with state:"+vm.getState()+", work id:"+work.getId()); + } + } Long hostId = vm.getHostId(); if (hostId == null) { + if(!forced){ + if (s_logger.isDebugEnabled()) { + s_logger.debug("HostId is null but this is not a forced stop, cannot stop vm "+ vm + " with state:"+vm.getState()); + } + return false; + } try { stateTransitTo(vm, Event.AgentReportStopped, null, null); } catch (NoTransitionException e) { s_logger.warn(e.getMessage()); } + //mark outstanding work item if any as done + if (work != null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Updating work item to Done, id:"+work.getId()); + } + work.setStep(Step.Done); + _workDao.update(work.getId(), work); + } return true; } @@ -946,22 +966,46 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene if (!forced) { throw new CloudRuntimeException("We cannot stop " + vm + " when it is in state " + vm.getState()); } - s_logger.debug("Unable to transition the state but we're moving on because it's forced stop"); - if (state == State.Starting || state == State.Stopping || state == State.Migrating) { - ItWorkVO work = _workDao.findByOutstandingWork(vm.getId(), vm.getState()); + boolean doCleanup = false; + if (s_logger.isDebugEnabled()) { + s_logger.debug("Unable to transition the state but we're moving on because it's forced stop"); + } + if (state == State.Starting || state == State.Migrating) { if (work != null) { - if (cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.StopRequested, forced, user, account)) { - try { - return stateTransitTo(vm, Event.AgentReportStopped, null); - } catch (NoTransitionException e) { - s_logger.warn("Unable to cleanup " + vm); - return false; - } + doCleanup = true; + }else{ + if (s_logger.isDebugEnabled()) { + s_logger.debug("Unable to cleanup VM: "+ vm+" ,since outstanding work item is not found"); } + throw new CloudRuntimeException("Work item not found, We cannot stop " + vm + " when it is in state " + vm.getState()); + } + }else if(state == State.Stopping){ + doCleanup = true; + } + + if(doCleanup){ + if (cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.StopRequested, forced, user, account)) { + try { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Updating work item to Done, id:"+work.getId()); + } + return changeState(vm, Event.AgentReportStopped, null, work, Step.Done); + } catch (NoTransitionException e) { + s_logger.warn("Unable to cleanup " + vm); + return false; + } + }else{ + if (s_logger.isDebugEnabled()) { + s_logger.debug("Failed to cleanup VM: "+ vm); + } + throw new CloudRuntimeException("Failed to cleanup " + vm + " , current state " + vm.getState()); } } } + if(vm.getState() != State.Stopping){ + throw new CloudRuntimeException("We cannot proceed with stop VM " + vm + " since it is not in 'Stopping' state, current state: " + vm.getState()); + } String routerPrivateIp = null; if (vm.getType() == VirtualMachine.Type.DomainRouter) { routerPrivateIp = vm.getPrivateIpAddress(); @@ -1017,6 +1061,14 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } try { + if (work != null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Updating the outstanding work item to Done, id:"+work.getId()); + } + work.setStep(Step.Done); + _workDao.update(work.getId(), work); + } + return stateTransitTo(vm, Event.OperationSucceeded, null, null); } catch (NoTransitionException e) { s_logger.warn(e.getMessage()); @@ -1262,14 +1314,18 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene if (vm != null) { if (work.getType() == State.Starting) { _haMgr.scheduleRestart(vm, true); + work.setManagementServerId(_nodeId); + _workDao.update(work.getId(), work); } else if (work.getType() == State.Stopping) { _haMgr.scheduleStop(vm, vm.getHostId(), WorkType.CheckStop); + work.setManagementServerId(_nodeId); + _workDao.update(work.getId(), work); } else if (work.getType() == State.Migrating) { _haMgr.scheduleMigration(vm); + work.setStep(Step.Done); + _workDao.update(work.getId(), work); } } - work.setStep(Step.Done); - _workDao.update(work.getId(), work); } catch (Exception e) { s_logger.error("Error while handling " + work, e); } @@ -1693,6 +1749,15 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.debug("VM state is starting on full sync so updating it to running"); vm = findById(vm.getType(), vm.getId()); + + //grab outstanding work item if any + ItWorkVO work = _workDao.findByOutstandingWork(vm.getId(), vm.getState()); + if(work != null){ + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found an outstanding work item for this vm "+ vm + " in state:"+vm.getState()+", work id:"+work.getId()); + } + } + try { stateTransitTo(vm, cause, hostId); } catch (NoTransitionException e1) { @@ -1726,6 +1791,14 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } else { s_logger.error("Unable to finalize commands on start for vm: " + vm); } + + if (work != null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Updating outstanding work item to Done, id:"+work.getId()); + } + work.setStep(Step.Done); + _workDao.update(work.getId(), work); + } } public Commands fullSync(final long hostId, StartupRoutingCommand startup) {