cloudstack/server/src/com/cloud/ha/HighAvailabilityManagerImpl...

/**
 *  Copyright (C) 2010 Cloud.com, Inc.  All rights reserved.
 *
 * This software is licensed under the GNU General Public License v3 or later.
 *
 * It is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package com.cloud.ha;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

import javax.ejb.Local;
import javax.naming.ConfigurationException;

import org.apache.log4j.Logger;

import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.MigrateCommand;
import com.cloud.alert.AlertManager;
import com.cloud.configuration.dao.ConfigurationDao;
import com.cloud.dc.DataCenterVO;
import com.cloud.dc.HostPodVO;
import com.cloud.dc.dao.DataCenterDao;
import com.cloud.dc.dao.HostPodDao;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.exception.StorageUnavailableException;
import com.cloud.ha.HaWorkVO.WorkType;
import com.cloud.ha.dao.HighAvailabilityDao;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.maid.StackMaid;
import com.cloud.server.ManagementServer;
import com.cloud.storage.StorageManager;
import com.cloud.storage.dao.GuestOSCategoryDao;
import com.cloud.storage.dao.GuestOSDao;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.component.Adapters;
import com.cloud.utils.component.ComponentLocator;
import com.cloud.utils.component.Inject;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.db.GlobalLock;
import com.cloud.vm.State;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.Event;
import com.cloud.vm.VirtualMachineGuru;
import com.cloud.vm.VirtualMachineManager;
import com.cloud.vm.dao.VMInstanceDao;

/**
 * HighAvailabilityManagerImpl coordinates the HA process.  VMs are registered with
 * the HA Coordinator for HA.  The request is stored within a database backed
 * work queue.  HACoordinator then also has a number of workers that pick up
 * these work items to perform HA on the VMs.
 *
 * The HA process goes as follows:
 *   1. Check with the list of Investigators to determine that the VM is
 *      no longer running.  If a Investigator finds the VM is still alive,
 *      the HA process is stopped and the state of the VM reverts back to
 *      its previous state.  If a Investigator finds the VM is dead, then
 *      HA process is started on the VM, skipping step 2.
 *   2. If the list of Investigators can not determine if the VM is dead or
 *      alive.  The list of FenceBuilders is invoked to fence off the VM
 *      so that it won't do any damage to the storage and network.
 *   3. The VM is marked as stopped.
 *   4. The VM is started again via the normal process of starting VMs.  Note
 *      that once the VM is marked as stopped, the user may have started the
 *      VM himself.
 *   5. VMs that have re-started more than the configured number of times are
 *      marked as in Error state and the user is not allowed to restart
 *      the VM.
 *
 * @config
 * {@table
 *    || Param Name | Description | Values | Default ||
 *    || workers | number of worker threads to spin off to do the processing | int | 1 ||
 *    || time.to.sleep | Time to sleep if no work items are found | seconds | 60 ||
 *    || max.retries | number of times to retry start | int | 5 ||
 *    || time.between.failure | Time elapsed between failures before we consider it as another retry | seconds | 3600 ||
 *    || time.between.cleanup | Time to wait before the cleanup thread runs | seconds | 86400 ||
 *    || force.ha | Force HA to happen even if the VM says no | boolean | false ||
 *    || ha.retry.wait | time to wait before retrying the work item | seconds | 120 ||
 *    || stop.retry.wait | time to wait before retrying the stop | seconds | 120 ||
 *  }
 **/
@SuppressWarnings("unchecked")
@Local(value={HighAvailabilityManager.class})
public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
    protected static final Logger s_logger = Logger.getLogger(HighAvailabilityManagerImpl.class);
    String _name;
    WorkerThread[] _workers;
    boolean _stopped;
    long _timeToSleep;
    @Inject HighAvailabilityDao _haDao;
    @Inject VMInstanceDao _instanceDao;
    @Inject HostDao _hostDao;
    @Inject DataCenterDao _dcDao;
    @Inject HostPodDao _podDao;
    long _serverId;
    Adapters<Investigator> _investigators;
    Adapters<FenceBuilder> _fenceBuilders;
    @Inject AgentManager _agentMgr;
    @Inject AlertManager _alertMgr;
    @Inject StorageManager _storageMgr;
    @Inject GuestOSDao _guestOSDao;
    @Inject GuestOSCategoryDao _guestOSCategoryDao;
    @Inject VirtualMachineManager _itMgr;

    String _instance;
    ScheduledExecutorService _executor;
    int _operationTimeout;
    int _stopRetryInterval;
    int _investigateRetryInterval;
    int _migrateRetryInterval;
    int _restartRetryInterval;

    HashMap<VirtualMachine.Type, VirtualMachineGuru<VMInstanceVO>> _handlers;

    int _maxRetries;
    long _timeBetweenFailures;
    long _timeBetweenCleanups;
    boolean _forceHA;

    protected HighAvailabilityManagerImpl() {
        _handlers = new HashMap<VirtualMachine.Type, VirtualMachineGuru<VMInstanceVO>>(11);
    }

    @Override
    public Status investigate(final long hostId) {
        final HostVO host = _hostDao.findById(hostId);
        if (host == null) {
            return null;
        }

        final Enumeration<Investigator> en = _investigators.enumeration();
        Status hostState = null;
        Investigator investigator = null;
        while (en.hasMoreElements()) {
            investigator = en.nextElement();
            hostState = investigator.isAgentAlive(host);
            if (hostState != null) {
                if (s_logger.isDebugEnabled()) {
                    s_logger.debug(investigator.getName()+ " was able to determine host " + hostId + " is in " + hostState.toString());
                }
                return hostState;
            }
            if (s_logger.isDebugEnabled()) {
                s_logger.debug(investigator.getName() + " unable to determine the state of the host.  Moving on.");
            }
        }

        return null;
    }


    @Override
    public void scheduleRestartForVmsOnHost(final HostVO host) {

        if( host.getType() != Host.Type.Routing) {
            return;
        }
        s_logger.warn("Scheduling restart for VMs on host " + host.getId());

        final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId());
        final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());

        // send an email alert that the host is down
        StringBuilder sb = null;
        if ((vms != null) && !vms.isEmpty()) {
            sb = new StringBuilder();
            sb.append("  Starting HA on the following VMs: ");
            // collect list of vm names for the alert email
            VMInstanceVO vm = vms.get(0);
            if (vm.isHaEnabled()) {
                sb.append(" " + vm.getName());
            }
            for (int i = 1; i < vms.size(); i++) {
                vm = vms.get(i);
                if (vm.isHaEnabled()) {
                    sb.append(" " + vm.getName());
                }
            }
        }

        // send an email alert that the host is down, include VMs
        HostPodVO podVO = _podDao.findById(host.getPodId());
        String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();

        _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host is down, " + hostDesc, "Host [" + hostDesc + "] is down."  + ((sb != null) ? sb.toString() : ""));

        for (final VMInstanceVO vm : vms) {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("Notifying HA Mgr of to investigate vm " + vm.getId() + "-" + vm.getName());
            }
            scheduleRestart(vm, true);
        }
    }

    @Override
    public void scheduleStop(final VMInstanceVO vm, long hostId, boolean verifyHost) {

    	if (_haDao.hasBeenScheduled(vm.getId(), verifyHost ? WorkType.CheckStop : WorkType.Stop)) {
    		s_logger.info("There's already a job scheduled to stop " + vm.toString());
    		return;
    	}

        final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), verifyHost ? WorkType.CheckStop : WorkType.Stop, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated());
        _haDao.persist(work);
        if (s_logger.isDebugEnabled()) {
        	s_logger.debug("Scheduled " + work.toString() + " verifyHost = " + verifyHost);
        }
        wakeupWorkers();
    }

    @Override
    public synchronized void registerHandler(final VirtualMachine.Type type, final VirtualMachineGuru<? extends VMInstanceVO> handler) {
        s_logger.info("Registering " + handler.getClass().getSimpleName() + " as the handler for " + type);
        _handlers.put(type, (VirtualMachineGuru<VMInstanceVO>)handler);
    }

    @Override
    public synchronized void unregisterHandler(final VirtualMachine.Type type) {
        _handlers.remove(type);
    }

    protected void wakeupWorkers() {
    	for (WorkerThread worker : _workers) {
    		worker.wakup();
    	}
    }

    @Override
    public boolean scheduleMigration(final VMInstanceVO vm) {
        final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, vm.getHostId(), vm.getState(), 0, vm.getUpdated());
        _haDao.persist(work);
        wakeupWorkers();
        return true;
    }

    @Override
    public void scheduleRestart(VMInstanceVO vm, final boolean investigate) {
    	Long hostId = vm.getHostId();
    	VirtualMachineGuru<VMInstanceVO> mgr = findManager(vm.getType());
    	vm = mgr.get(vm.getId());
        if (!investigate) {
        	if (s_logger.isDebugEnabled()) {
        		s_logger.debug("VM does not require investigation so I'm marking it as Stopped: " + vm.toString());
        	}

            short alertType = AlertManager.ALERT_TYPE_USERVM;
            if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
                alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
            } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
                alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
            }

            if (!(_forceHA || vm.isHaEnabled())) {

                _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: "
                		+ vm.getName() + ", id: " + vm.getId() + ") stopped unexpectedly on host "
                		+ vm.getHostId(), "Virtual Machine " + vm.getName() + " (id: "
                		+ vm.getId() + ") running on host [" + vm.getHostId()
                		+ "] stopped unexpectedly.");

                if (s_logger.isDebugEnabled()) {
                    s_logger.debug("VM is not HA enabled so we're done.");
                }
            }

        	mgr.completeStopCommand(vm);
        }

        final List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
        int maxRetries = 0;
        for (final HaWorkVO item : items) {
            if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) {
                maxRetries = item.getTimesTried();
                break;
            }
        }

        final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(),
                maxRetries + 1, vm.getUpdated());
        _haDao.persist(work);

        if (s_logger.isInfoEnabled()) {
            s_logger.info("Schedule vm for HA:  " + vm.toString());
        }

        wakeupWorkers();
    }

    protected VirtualMachineGuru<VMInstanceVO> findManager(final VirtualMachine.Type type) {
        return _handlers.get(type);
    }

    protected Long restart(final HaWorkVO work) {
        final long vmId = work.getInstanceId();

        final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());
        if (mgr == null) {
            s_logger.warn("Unable to find a handler for " + work.getType().toString() + ", throwing out " + vmId);
            return null;
        }

        VMInstanceVO vm = mgr.get(vmId);
        if (vm == null) {
            s_logger.info("Unable to find vm: " + vmId);
            return null;
        }

        s_logger.info("HA on " + vm.toString());
        if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) {
        	s_logger.info("VM " + vm.toString() + " has been changed.  Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime());
        	return null;
        }

        final HostVO host = _hostDao.findById(work.getHostId());

        DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
        HostPodVO podVO = _podDao.findById(host.getPodId());
        String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();

        short alertType = AlertManager.ALERT_TYPE_USERVM;
        if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
            alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
        } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
            alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
        }

        Boolean alive = null;
        if (work.getStep() == Step.Investigating) {
            if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
                s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId());
                if (vm.getState() == State.Starting && vm.getUpdated() == work.getUpdateTime()) {
                	_itMgr.stateTransitTo(vm, Event.AgentReportStopped, null);
                }
                return null;
            }

        	Enumeration<Investigator> en = _investigators.enumeration();
            Investigator investigator = null;
            while (en.hasMoreElements()) {
                investigator = en.nextElement();
                alive = investigator.isVmAlive(vm, host);
                if (alive != null) {
                    s_logger.debug(investigator.getName() + " found VM " + vm.getName() + "to be alive? " + alive);
                    break;
                }
            }
            if (alive != null && alive) {
                s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName());
                if (host.getStatus() == Status.Up) {
                	compareState(vm, new AgentVmInfo(vm.getInstanceName(), mgr, State.Running), false);
                	return null;
                } else {
                    s_logger.debug("Rescheduling because the host is not up but the vm is alive");
                    return (System.currentTimeMillis() >> 10) + _investigateRetryInterval;
                }
            }

            boolean fenced = false;
            if (alive == null || !alive) {
                fenced = true;
                s_logger.debug("Fencing off VM that we don't know the state of");
                Enumeration<FenceBuilder> enfb = _fenceBuilders.enumeration();
                while (enfb.hasMoreElements()) {
                    final FenceBuilder fb = enfb.nextElement();
                    Boolean result = fb.fenceOff(vm, host);
                    if (result != null && !result) {
                    	fenced = false;
                    }
                }
            }

            if (alive== null && !fenced) {
            	s_logger.debug("We were unable to fence off the VM " + vm.toString());
                _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc);
            	return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
            }

            mgr.completeStopCommand(vm);

            work.setStep(Step.Scheduled);
            _haDao.update(work.getId(), work);
        }

        // send an alert for VMs that stop unexpectedly
        _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(),
        		"VM (name: " + vm.getName() + ", id: " + vmId + ") stopped unexpectedly on host "
        		+ hostDesc, "Virtual Machine " + vm.getName() + " (id: "
        		+ vm.getId() + ") running on host [" + hostDesc + "] stopped unexpectedly.");

        vm = mgr.get(vm.getId());

        if (!_forceHA && !vm.isHaEnabled()) {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("VM is not HA enabled so we're done.");
            }
            return null;  // VM doesn't require HA
        }

        if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("VM can not restart on another server.");
            }
            return null;
        }

        if (work.getTimesTried() > _maxRetries) {
            s_logger.warn("Retried to max times so deleting: " + vmId);
            return null;
        }

        try {
            VMInstanceVO started = mgr.start(vm.getId());
            if (started != null) {
                s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId());
                return null;
            }

            if (s_logger.isDebugEnabled()) {
            	s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval);
            }
            vm = mgr.get(vm.getId());
            work.setUpdateTime(vm.getUpdated());
            work.setPreviousState(vm.getState());
            return (System.currentTimeMillis() >> 10) + _restartRetryInterval;
        } catch (final InsufficientCapacityException e) {
        	s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
            _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc);
            return null;
        } catch (final ResourceUnavailableException e) {
        	s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
            _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc);
            return null;
        } catch (ConcurrentOperationException e) {
        	s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage());
            _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc);
            return null;
        }
    }


    /**
     * compareState does as its name suggests and compares the states between
     * management server and agent.  It returns whether something should be
     * cleaned up
     *
     */
    protected Command compareState(VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync) {
    	State agentState = info.state;
        final String agentName = info.name;
        final State serverState = vm.getState();
        final String serverName = vm.getName();


        Command command = null;

        if (s_logger.isDebugEnabled()) {
            s_logger.debug("VM " + serverName + ": server state = " + serverState.toString() + " and agent state = " + agentState.toString());
        }

        if (agentState == State.Error) {
        	agentState = State.Stopped;

            short alertType = AlertManager.ALERT_TYPE_USERVM;
            if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
                alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER;
            } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
                alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY;
            }

            HostPodVO podVO = _podDao.findById(vm.getPodId());
            DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
            HostVO hostVO = _hostDao.findById(vm.getHostId());

            String hostDesc = "name: " + hostVO.getName() + " (id:" + hostVO.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();
            _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " + vm.getName() + ", id: " + vm.getId() + ") stopped on host " + hostDesc + " due to storage failure", "Virtual Machine " + vm.getName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped due to storage failure.");
        }

        if (serverState == State.Migrating) {
        	s_logger.debug("Skipping vm in migrating state: " + vm.toString());
        	return null;
        }

        if (agentState == serverState) {
            if (s_logger.isDebugEnabled()) {
                s_logger.debug("Both states are " + agentState.toString() + " for " + serverName);
            }
            assert (agentState == State.Stopped || agentState == State.Running) : "If the states we send up is changed, this must be changed.";
            _itMgr.stateTransitTo(vm, agentState == State.Stopped ? VirtualMachine.Event.AgentReportStopped : VirtualMachine.Event.AgentReportRunning, vm.getHostId());
            if (agentState == State.Stopped) {
                s_logger.debug("State matches but the agent said stopped so let's send a cleanup anyways.");
                return info.mgr.cleanup(vm, agentName);
            }
            return null;
        }

        if (agentState == State.Stopped) {
            // This state means the VM on the agent was detected previously
            // and now is gone.  This is slightly different than if the VM
            // was never completed but we still send down a Stop Command
            // to ensure there's cleanup.
            if (serverState == State.Running ) {
                // Our records showed that it should be running so let's restart it.
                vm = info.mgr.get(vm.getId());
                scheduleRestart(vm, false);
                command = info.mgr.cleanup(vm, agentName);
            } else if (serverState == State.Stopping) {
                if (fullSync) {
                    s_logger.debug("VM is in stopping state on full sync.  Updating the status to stopped");
                    vm = info.mgr.get(vm.getId());
                    info.mgr.completeStopCommand(vm);
                    command = info.mgr.cleanup(vm, agentName);
                } else {
                    s_logger.debug("Ignoring VM in stopping mode: " + vm.getName());
                }
            } else if (serverState == State.Starting) {
                s_logger.debug("Ignoring VM in starting mode: " + vm.getName());
            } else {
                s_logger.debug("Sending cleanup to a stopped vm: " + agentName);
                _itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportStopped, null);
                command = info.mgr.cleanup(vm, agentName);
            }
        } else if (agentState == State.Running) {
            if (serverState == State.Starting) {
                if (fullSync) {
                    s_logger.debug("VM state is starting on full sync so updating it to running");
                    vm = info.mgr.get(vm.getId());
                    info.mgr.completeStartCommand(vm);
                }
            } else if (serverState == State.Stopping) {
                if (fullSync) {
                    s_logger.debug("VM state is in stopping on fullsync so resend stop.");
                    vm = info.mgr.get(vm.getId());
                    info.mgr.completeStopCommand(vm);
                    command = info.mgr.cleanup(vm, agentName);
                } else {
                    s_logger.debug("VM is in stopping state so no action.");
                }
            } else if (serverState == State.Destroyed || serverState == State.Stopped || serverState == State.Expunging) {
            	s_logger.debug("VM state is in stopped so stopping it on the agent");
            	vm = info.mgr.get(vm.getId());
            	command = info.mgr.cleanup(vm, agentName);
            } else {
            	_itMgr.stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, vm.getHostId());
            }
        } /*else if (agentState == State.Unknown) {
            if (serverState == State.Running) {
                if (fullSync) {
                    vm = info.handler.get(vm.getId());
                }
                scheduleRestart(vm, false);
            } else if (serverState == State.Starting) {
                if (fullSync) {
                    vm = info.handler.get(vm.getId());
                }
                scheduleRestart(vm, false);
            } else if (serverState == State.Stopping) {
                if (fullSync) {
                    s_logger.debug("VM state is stopping in full sync.  Resending stop");
                    command = info.handler.cleanup(vm, agentName);
                }
            }
        }*/
        return command;
    }

    public List<Command> fullSync(final long hostId, final Map<String, State> newStates) {
        final List<? extends VMInstanceVO> vms = _instanceDao.listByHostId(hostId);
        s_logger.debug("Found " + vms.size() + " VMs for host " + hostId);

        final Map<Long, AgentVmInfo> states = convertToIds(newStates);
        final ArrayList<Command> commands = new ArrayList<Command>();

        for (final VMInstanceVO vm : vms) {
            AgentVmInfo info = states.remove(vm.getId());

            if (info == null) {
                info = new AgentVmInfo(null, findManager(vm.getType()), State.Stopped);
            }

            assert info.mgr != null : "How can the manager be null for " + vm.getType();

            VMInstanceVO vmCasted = info.mgr.get(vm.getId());
            final Command command = compareState(vmCasted, info, true);
            if (command != null) {
                commands.add(command);
            }
        }

        for (final AgentVmInfo left : states.values()) {
            s_logger.warn("Stopping a VM that we have no record of: " + left.name);
            commands.add(left.mgr.cleanup(null, left.name));
        }

        return commands;
    }

    protected Map<Long, AgentVmInfo> convertToIds(final Map<String, State> states) {
        final HashMap<Long, AgentVmInfo> map = new HashMap<Long, AgentVmInfo>();

        if (states == null) {
            return map;
        }

        final Collection<VirtualMachineGuru<VMInstanceVO>> handlers = _handlers.values();

        for (final Map.Entry<String, State> entry : states.entrySet()) {
            for (final VirtualMachineGuru<VMInstanceVO> handler : handlers) {
                final String name = entry.getKey();

                final Long id = handler.convertToId(name);

                if (id != null) {
                    map.put(id, new AgentVmInfo(entry.getKey(), handler, entry.getValue()));
                    break;
                }
            }
        }

        return map;
    }

    public List<Command> deltaSync(final long hostId, final Map<String, State> newStates) {
        final Map<Long, AgentVmInfo> states = convertToIds(newStates);
        final ArrayList<Command> commands = new ArrayList<Command>();

        for (final Map.Entry<Long, AgentVmInfo> entry : states.entrySet()) {
            final AgentVmInfo info = entry.getValue();

            final VMInstanceVO vm = info.mgr.get(entry.getKey());

            Command command = null;
            if (vm != null && vm.getHostId() != null && vm.getHostId() == hostId) {
                command = compareState(vm, info, false);
            } else {
                s_logger.debug("VM is not found.  Stopping " + info.name);
                command = info.mgr.cleanup(null, info.name);
            }

            if (command != null) {
                commands.add(command);
            }
        }

        return commands;
    }

    public Long migrate(final HaWorkVO work) {
        final long vmId = work.getInstanceId();

        final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());

        VMInstanceVO vm = mgr.get(vmId);
        if (vm == null || vm.getRemoved() != null) {
            s_logger.debug("Unable to find the vm " + vmId);
            return null;
        }

        s_logger.info("Migrating vm: " + vm.toString());
        if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) {
        	s_logger.info("VM is not longer running on the current hostId");
        	return null;
        }

        short alertType = AlertManager.ALERT_TYPE_USERVM_MIGRATE;
        if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) {
            alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE;
        } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) {
            alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY_MIGRATE;
        }

        HostVO fromHost = _hostDao.findById(vm.getHostId());
        String fromHostName = ((fromHost == null) ? "unknown" : fromHost.getName());
        HostVO toHost = null;
        if (work.getStep() == Step.Scheduled) {
            if (vm.getState() != State.Running) {
                s_logger.info("VM's state is not ready for migration. " + vm.toString() + " State is " + vm.getState().toString());
                return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
            }

            DataCenterVO dcVO = _dcDao.findById(fromHost.getDataCenterId());
            HostPodVO podVO = _podDao.findById(fromHost.getPodId());

            try {
                toHost = mgr.prepareForMigration(vm);
                if (toHost == null) {
                    if (s_logger.isDebugEnabled()) {
                        s_logger.debug("Unable to find a host for migrating vm " + vmId);
                    }
                    _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHostName + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Unable to find a suitable host");
                }
            } catch(final InsufficientCapacityException e) {
            	s_logger.warn("Unable to mgirate due to insufficient capacity " + vm.toString());
                _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHostName + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Insufficient capacity");
            } catch(final StorageUnavailableException e) {
                s_logger.warn("Storage is unavailable: " + vm.toString());
                _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHostName + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Storage is gone.");
            }

            if (toHost == null) {
                _agentMgr.maintenanceFailed(vm.getHostId());
                return null;
            }

            if (s_logger.isDebugEnabled()) {
                s_logger.debug("Migrating from " + work.getHostId() + " to " + toHost.getId());
            }
            work.setStep(Step.Migrating);
            work.setHostId(toHost.getId());
            _haDao.update(work.getId(), work);
        }

        if (work.getStep() == Step.Migrating) {
        	vm = mgr.get(vmId);	// let's see if anything has changed.
            boolean migrated = false;
        	if (vm == null || vm.getRemoved() != null || vm.getHostId() == null || !_itMgr.stateTransitTo(vm, Event.MigrationRequested, vm.getHostId())) {
            	s_logger.info("Migration cancelled because state has changed: " + vm.toString());
        	} else {
                try {
                    boolean isWindows = _guestOSCategoryDao.findById(_guestOSDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows");
	                MigrateCommand cmd = new MigrateCommand(vm.getInstanceName(), toHost.getPrivateIpAddress(), isWindows);
	                Answer answer = _agentMgr.send(fromHost.getId(), cmd);
	                if (answer != null && answer.getResult()) {
	                	migrated = true;
	                    _storageMgr.unshare(vm, fromHost);
	                    work.setStep(Step.Investigating);
	                    _haDao.update(work.getId(), work);
	                }
                } catch (final AgentUnavailableException e) {
                    s_logger.debug("host became unavailable");
                } catch (final OperationTimedoutException e) {
                	s_logger.debug("operation timed out");
                	if (e.isActive()) {
                        scheduleRestart(vm, true);
                	}
                }
          	}

            if (!migrated) {
                s_logger.info("Migration was unsuccessful.  Cleaning up: " + vm.toString());

                DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId());
                HostPodVO podVO = _podDao.findById(vm.getPodId());
                _alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHost.getName() + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Migrate Command failed.  Please check logs.");

                _itMgr.stateTransitTo(vm, Event.MigrationFailedOnSource, toHost.getId());
                _agentMgr.maintenanceFailed(vm.getHostId());

                Command cleanup = mgr.cleanup(vm, null);
                _agentMgr.easySend(toHost.getId(), cleanup);
                _storageMgr.unshare(vm, toHost);

                return null;
            }
        }

        if (toHost == null) {
            toHost = _hostDao.findById(work.getHostId());
        }
        DataCenterVO dcVO = _dcDao.findById(toHost.getDataCenterId());
        HostPodVO podVO = _podDao.findById(toHost.getPodId());

        try {
            if (!mgr.completeMigration(vm, toHost)) {
                _alertMgr.sendAlert(alertType, toHost.getDataCenterId(), toHost.getPodId(), "Unable to migrate " + vmId + " to host " + toHost.getName() + " in zone " + dcVO.getName() + " and pod " + podVO.getName(), "Migration not completed");
                s_logger.warn("Unable to complete migration: " + vm.toString());
            } else {
            	s_logger.info("Migration is complete: " + vm.toString());
            }
            return null;
        } catch (final AgentUnavailableException e) {
        	s_logger.warn("Agent is unavailable for " + vm.toString());
        } catch (final OperationTimedoutException e) {
        	s_logger.warn("Operation timed outfor " + vm.toString());
        }
    	_itMgr.stateTransitTo(vm, Event.MigrationFailedOnDest, toHost.getId());
    	return (System.currentTimeMillis() >> 10) + _migrateRetryInterval;
    }

    @Override
    public void scheduleDestroy(VMInstanceVO vm, long hostId) {
        final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Destroy, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated());
        _haDao.persist(work);
        if (s_logger.isDebugEnabled()) {
        	s_logger.debug("Scheduled " + work.toString());
        }
        wakeupWorkers();
    }

    @Override
    public void cancelDestroy(VMInstanceVO vm, Long hostId) {
    	_haDao.delete(vm.getId(), WorkType.Destroy);
    }

    protected Long destroyVM(HaWorkVO work) {
        final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());
        final VMInstanceVO vm = mgr.get(work.getInstanceId());
        s_logger.info("Destroying " + vm.toString());
        try {
        	if (vm.getState() != State.Destroyed) {
        		s_logger.info("VM is no longer in Destroyed state " + vm.toString());
        		return null;
        	}

            if (vm.getHostId() != null) {
    			Command cmd = mgr.cleanup(vm, null);
    			Answer ans = _agentMgr.send(work.getHostId(), cmd);
    			if (ans.getResult()) {
    				mgr.completeStopCommand(vm);
    				if (mgr.destroy(vm)) {
    					s_logger.info("Successfully stopped " + vm.toString());
    					return null;
    				}
    			}
    			s_logger.debug("Stop for " + vm.toString() + " was unsuccessful. Detail: " + ans.getDetails());
            } else {
            	if (s_logger.isDebugEnabled()) {
            		s_logger.debug(vm.toString() + " has already been stopped");
            	}
                return null;
            }
        } catch (final AgentUnavailableException e) {
            s_logger.debug("Agnet is not available" + e.getMessage());
        } catch (OperationTimedoutException e) {
        	s_logger.debug("operation timed out: " + e.getMessage());
		}

        work.setTimesTried(work.getTimesTried() + 1);
        return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
    }

    protected Long stopVM(final HaWorkVO work) {
        final VirtualMachineGuru<VMInstanceVO> mgr = findManager(work.getType());
        final VMInstanceVO vm = mgr.get(work.getInstanceId());
        s_logger.info("Stopping " + vm.toString());
        try {
        	if (work.getWorkType() == WorkType.Stop) {
	            if (vm.getHostId() != null) {
	                if (mgr.stop(vm)) {
	                	s_logger.info("Successfully stopped " + vm.toString());
	                    return null;
	                }
	            } else {
	            	if (s_logger.isDebugEnabled()) {
	            		s_logger.debug(vm.toString() + " has already been stopped");
	            	}
	                return null;
	            }
        	} else if (work.getWorkType() == WorkType.CheckStop) {
        		if ((vm.getState() != State.Stopping) || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) {
        			if (s_logger.isDebugEnabled()) {
        				s_logger.debug(vm.toString() + " is different now.  Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState());
        			}
        			return null;
        		} else {
        			Command cmd = mgr.cleanup(vm, null);
        			Answer ans = _agentMgr.send(work.getHostId(), cmd);
        			if (ans.getResult()) {
        				mgr.completeStopCommand(vm);
        				s_logger.info("Successfully stopped " + vm.toString());
        				return null;
        			}
        			s_logger.debug("Stop for " + vm.toString() + " was unsuccessful. Detail: " + ans.getDetails());
        		}
        	} else {
        		assert false : "Who decided there's other steps but didn't modify the guy who does the work?";
        	}
        } catch (final AgentUnavailableException e) {
            s_logger.debug("Agnet is not available" + e.getMessage());
        } catch (OperationTimedoutException e) {
        	s_logger.debug("operation timed out: " + e.getMessage());
		}

        work.setTimesTried(work.getTimesTried() + 1);
        return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
    }

    @Override
    public void cancelScheduledMigrations(final HostVO host) {
        WorkType type = host.getType() == HostVO.Type.Storage ? WorkType.Stop : WorkType.Migration;

        _haDao.deleteMigrationWorkItems(host.getId(), type, _serverId);
    }

    @Override
    public List<VMInstanceVO> findTakenMigrationWork() {
    	List<HaWorkVO> works = _haDao.findTakenWorkItems(WorkType.Migration);
    	List<VMInstanceVO> vms = new ArrayList<VMInstanceVO>(works.size());
    	for (HaWorkVO work : works) {
    		vms.add(_instanceDao.findById(work.getInstanceId()));
    	}
    	return vms;
    }

    @Override
    public boolean configure(final String name, final Map<String, Object> xmlParams) throws ConfigurationException {
        _name = name;
        ComponentLocator locator = ComponentLocator.getLocator(ManagementServer.Name);

        _serverId = ((ManagementServer)ComponentLocator.getComponent(ManagementServer.Name)).getId();

        _investigators = locator.getAdapters(Investigator.class);
        _fenceBuilders = locator.getAdapters(FenceBuilder.class);

        Map<String, String> params = new HashMap<String, String>();
        final ConfigurationDao configDao = locator.getDao(ConfigurationDao.class);
        if (configDao != null) {
            params = configDao.getConfiguration(Long.toHexString(_serverId), xmlParams);
        }

        String value = params.get("workers");
        final int count = NumbersUtil.parseInt(value, 1);
        _workers = new WorkerThread[count];
        for (int i = 0; i < _workers.length; i++) {
            _workers[i] = new WorkerThread("HA-Worker-" + i);
        }

        value = params.get("force.ha");
        _forceHA = Boolean.parseBoolean(value);

        value = params.get("time.to.sleep");
        _timeToSleep = NumbersUtil.parseInt(value, 60) * 1000;

        value = params.get("max.retries");
        _maxRetries = NumbersUtil.parseInt(value, 5);

        value = params.get("time.between.failures");
        _timeBetweenFailures = NumbersUtil.parseLong(value, 3600) * 1000;

        value = params.get("time.between.cleanup");
        _timeBetweenCleanups = NumbersUtil.parseLong(value, 3600 * 24);

        value = params.get("wait");
        _operationTimeout = NumbersUtil.parseInt(value, 1800) * 2;

        value = params.get("stop.retry.interval");
        _stopRetryInterval = NumbersUtil.parseInt(value, 10 * 60);

        value = params.get("restart.retry.interval");
        _restartRetryInterval = NumbersUtil.parseInt(value, 10 * 60);

        value = params.get("investigate.retry.interval");
        _investigateRetryInterval = NumbersUtil.parseInt(value, 1 * 60);

        value = params.get("migrate.retry.interval");
        _migrateRetryInterval = NumbersUtil.parseInt(value, 2 * 60);

        _instance = params.get("instance");
        if (_instance == null) {
            _instance = "VMOPS";
        }

        _stopped = true;

        _executor = Executors.newScheduledThreadPool(count, new NamedThreadFactory("HA"));

        _agentMgr.registerForHostEvents(new VmSyncListener(this, _agentMgr), true, true, true);

        return true;
    }

    @Override
    public String getName() {
        return _name;
    }

    @Override
    public boolean start() {
        _stopped = false;

        for (final WorkerThread thread : _workers) {
            thread.start();
        }

        _executor.scheduleAtFixedRate(new CleanupTask(), _timeBetweenCleanups, _timeBetweenCleanups, TimeUnit.SECONDS);
        _executor.scheduleAtFixedRate(new TransitionTask(), 0, _operationTimeout, TimeUnit.SECONDS);

        return true;
    }

    @Override
    public boolean stop() {
        _stopped = true;

        wakeupWorkers();

        _executor.shutdown();

        return true;
    }

    protected class CleanupTask implements Runnable {
        @Override
        public void run() {
            s_logger.info("HA Cleanup Thread Running");

            try {
            	_haDao.cleanup(System.currentTimeMillis() - _timeBetweenFailures);
            } catch (Exception e) {
                s_logger.warn("Error while cleaning up", e);
            } finally {
            	StackMaid.current().exitCleanup();
            }
        }
    }

    protected class WorkerThread extends Thread {
    	public WorkerThread(String name) {
    		super(name);
    	}

        @Override
        public void run() {
        	s_logger.info("Starting work");
            while (!_stopped) {
                try {
                    s_logger.trace("Checking the database");
                    final HaWorkVO work = _haDao.take(_serverId);
                    if (work == null) {
                        try {
                        	synchronized(this) {
                        		wait(_timeToSleep);
                        	}
                            continue;
                        } catch (final InterruptedException e) {
                            s_logger.info("Interrupted");
                            continue;
                        }
                    }

                    s_logger.info("Working on " + work.toString());

                    try {
                        final WorkType wt = work.getWorkType();
                        Long nextTime = null;
                        if (wt == WorkType.Migration) {
                            nextTime = migrate(work);
                        } else if (wt == WorkType.HA) {
                            nextTime = restart(work);
                        } else if (wt == WorkType.Stop || wt == WorkType.CheckStop) {
                            nextTime = stopVM(work);
                        } else if (wt == WorkType.Destroy) {
                        	nextTime = destroyVM(work);
                        } else {
                        	assert false : "How did we get here with " + wt.toString();
                            continue;
                        }

                        if (nextTime == null) {
                        	if (s_logger.isDebugEnabled()) {
                        		s_logger.debug(work.toString() + " is complete");
                        	}
                            work.setStep(Step.Done);
                        } else {
                        	if (s_logger.isDebugEnabled()) {
                        		s_logger.debug("Rescheduling " + work.toString() + " for instance " + work.getInstanceId() + " to try again at " + new Date(nextTime << 10));
                        	}
                            work.setTimeToTry(nextTime);
                            work.setServerId(null);
                            work.setDateTaken(null);
                        }
                    } catch (Exception e) {
                        s_logger.error("Caught this exception while processing the work queue.", e);
                        work.setStep(Step.Error);
                    }
                    _haDao.update(work.getId(), work);
                } catch(final Throwable th) {
                    s_logger.error("Caught this throwable, ", th);
                } finally {
                	StackMaid.current().exitCleanup();
                }
            }
            s_logger.info("Time to go home!");
        }

        public synchronized void wakup() {
        	notifyAll();
        }
    }

    protected class TransitionTask implements Runnable {
    	@Override
    	public void run() {
    		GlobalLock lock = GlobalLock.getInternLock("TransitionChecking");
			if (lock == null) {
				s_logger.debug("Couldn't get the global lock");
				return;
			}

			if (!lock.lock(30)) {
				s_logger.debug("Couldn't lock the db");
				return;
			}
    		try {
    			lock.addRef();
	    		List<VMInstanceVO> instances = _instanceDao.findVMInTransition(new Date(new Date().getTime() - (_operationTimeout * 1000)), State.Starting, State.Stopping);
	    		for (VMInstanceVO instance : instances) {
	    			State state = instance.getState();
	    			if (state == State.Stopping) {
	    				scheduleStop(instance, instance.getHostId(), true);
	    			} else if (state == State.Starting) {
	    				scheduleRestart(instance, true);
	    			}
	    		}
    		} catch (Exception e) {
    			s_logger.warn("Caught the following exception on transition checking", e);
    		} finally {
    			StackMaid.current().exitCleanup();
    			lock.unlock();
    		}
    	}
    }

    protected class AgentVmInfo {
        public String name;
        public VirtualMachineGuru<VMInstanceVO> mgr;
        public State state;
        public State action;

        public AgentVmInfo(final String name, final VirtualMachineGuru<VMInstanceVO> handler, final State state) {
            this.name = name;
            this.mgr = handler;
            this.state = state;
        }
    }
}