Let VMSync be aware of HA take-over on VM state management.

This commit is contained in:
Kelven Yang 2014-03-16 12:34:17 -07:00
parent 0043a8f384
commit a5f418dd02
4 changed files with 53 additions and 68 deletions

View File

@ -4221,6 +4221,16 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
case Stopped:
case Migrating:
s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it");
if(vm.isHaEnabled() && vm.getState() == State.Running && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) {
s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart");
if(!_haMgr.hasPendingHaWork(vm.getId()))
_haMgr.scheduleRestart(vm, true);
else
s_logger.info("VM " + vm.getInstanceName() + " already has an pending HA task working on it");
return;
}
VirtualMachineGuru vmGuru = getVmGuru(vm);
VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
sendStop(vmGuru, profile, true);
@ -4406,10 +4416,11 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
super(VirtualMachine.class, job, VmJobCheckInterval.value(), new Predicate() {
@Override
public boolean checkCondition() {
VMInstanceVO instance = _vmDao.findById(vmId);
if ((instance.getPowerState() == desiredPowerState && srcHostIdForMigration == null) ||
(instance.getPowerState() == desiredPowerState && (srcHostIdForMigration != null && instance.getPowerHostId() != srcHostIdForMigration)))
AsyncJobVO jobVo = _entityMgr.findById(AsyncJobVO.class, job.getId());
assert (jobVo != null);
if (jobVo == null || jobVo.getStatus() != JobInfo.Status.IN_PROGRESS)
return true;
return false;
}
}, Topics.VM_POWER_STATE, AsyncJob.Topics.JOB_STATE);

View File

@ -465,7 +465,7 @@ public class VMInstanceVO implements VirtualMachine, FiniteStateObject<State, Vi
@Override
public String toString() {
if (toString == null) {
toString = new StringBuilder("VM[").append(type.toString()).append("|").append(hostName).append("]").toString();
toString = new StringBuilder("VM[").append(type.toString()).append("|").append(getInstanceName()).append("]").toString();
}
return toString;
}

View File

@ -417,44 +417,44 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
@Override
public boolean updateState(State oldState, Event event, State newState, VirtualMachine vm, Object opaque) {
if (newState == null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("There's no way to transition from old state: " + oldState.toString() + " event: " + event.toString());
}
return false;
}
if (newState == null) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("There's no way to transition from old state: " + oldState.toString() + " event: " + event.toString());
}
return false;
}
@SuppressWarnings("unchecked")
Pair<Long, Long> hosts = (Pair<Long,Long>)opaque;
Long newHostId = hosts.second();
@SuppressWarnings("unchecked")
Pair<Long, Long> hosts = (Pair<Long, Long>)opaque;
Long newHostId = hosts.second();
VMInstanceVO vmi = (VMInstanceVO)vm;
Long oldHostId = vmi.getHostId();
Long oldUpdated = vmi.getUpdated();
Date oldUpdateDate = vmi.getUpdateTime();
if ( newState.equals(oldState) && newHostId != null && newHostId.equals(oldHostId) ) {
// state is same, don't need to update
return true;
}
VMInstanceVO vmi = (VMInstanceVO)vm;
Long oldHostId = vmi.getHostId();
Long oldUpdated = vmi.getUpdated();
Date oldUpdateDate = vmi.getUpdateTime();
if (newState.equals(oldState) && newHostId != null && newHostId.equals(oldHostId)) {
// state is same, don't need to update
return true;
}
// lock the target row at beginning to avoid lock-promotion caused deadlock
lockRow(vm.getId(), true);
SearchCriteria<VMInstanceVO> sc = StateChangeSearch.create();
sc.setParameters("id", vmi.getId());
sc.setParameters("states", oldState);
sc.setParameters("host", vmi.getHostId());
sc.setParameters("update", vmi.getUpdated());
vmi.incrUpdated();
UpdateBuilder ub = getUpdateBuilder(vmi);
SearchCriteria<VMInstanceVO> sc = StateChangeSearch.create();
sc.setParameters("id", vmi.getId());
sc.setParameters("states", oldState);
sc.setParameters("host", vmi.getHostId());
sc.setParameters("update", vmi.getUpdated());
ub.set(vmi, "state", newState);
ub.set(vmi, "hostId", newHostId);
ub.set(vmi, "podIdToDeployIn", vmi.getPodIdToDeployIn());
ub.set(vmi, _updateTimeAttr, new Date());
vmi.incrUpdated();
UpdateBuilder ub = getUpdateBuilder(vmi);
int result = update(vmi, sc);
ub.set(vmi, "state", newState);
ub.set(vmi, "hostId", newHostId);
ub.set(vmi, "podIdToDeployIn", vmi.getPodIdToDeployIn());
ub.set(vmi, _updateTimeAttr, new Date());
int result = update(vmi, sc);
if (result == 0) {
VMInstanceVO vo = findByIdIncludingRemoved(vm.getId());

View File

@ -72,7 +72,6 @@ import com.cloud.utils.NumbersUtil;
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.fsm.StateListener;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.State;
@ -103,8 +102,7 @@ import com.cloud.vm.dao.VMInstanceDao;
* before retrying the stop | seconds | 120 || * }
**/
@Local(value = { HighAvailabilityManager.class })
public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvailabilityManager, ClusterManagerListener,
StateListener<State, VirtualMachine.Event, VirtualMachine> {
public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvailabilityManager, ClusterManagerListener {
protected static final Logger s_logger = Logger.getLogger(HighAvailabilityManagerImpl.class);
WorkerThread[] _workers;
@ -236,7 +234,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
return;
}
s_logger.warn("Scheduling restart for VMs on host " + host.getId());
s_logger.warn("Scheduling restart for VMs on host " + host.getId() + "-" + host.getName());
final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId());
final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
@ -806,7 +804,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
_stopped = true;
_executor = Executors.newScheduledThreadPool(count, new NamedThreadFactory("HA"));
VirtualMachine.State.getStateMachine().registerListener(this);
return true;
}
@ -921,6 +918,12 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
work.setTimeToTry(nextTime);
work.setServerId(null);
work.setDateTaken(null);
// if restart failed in the middle due to exception, VM state may has been changed
// recapture into the HA worker so that it can really continue in it next turn
VMInstanceVO vm = _instanceDao.findById(work.getInstanceId());
work.setUpdateTime(vm.getUpdated());
work.setPreviousState(vm.getState());
}
_haDao.update(work.getId(), work);
} catch (final Throwable th) {
@ -962,35 +965,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
return _haPlanners.get(0);
}
@Override
public boolean preStateTransitionEvent(State oldState, VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, Object opaque) {
return true;
}
@Override
public boolean postStateTransitionEvent(State oldState, VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, Object opaque) {
if (oldState == State.Running && event == VirtualMachine.Event.FollowAgentPowerOffReport && newState == State.Stopped) {
final VMInstanceVO vm = _instanceDao.findById(vo.getId());
if (vm.isHaEnabled()) {
if (vm.getState() != State.Stopped)
s_logger.warn("Sanity check failed. postStateTransitionEvent reports transited to Stopped but VM " + vm + " is still at state " + vm.getState());
s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart");
_executor.submit(new ManagedContextRunnable() {
@Override
protected void runInContext() {
try {
scheduleRestart(vm, false);
} catch (Exception e) {
s_logger.warn("Unexpected exception when scheduling a HA restart", e);
}
}
});
}
}
return true;
}
@Override
public boolean hasPendingHaWork(long vmId) {
List<HaWorkVO> haWorks = _haDao.listRunningHaWorkForVm(vmId);