diff --git a/api/src/com/cloud/agent/api/ClusterSyncAnswer.java b/api/src/com/cloud/agent/api/ClusterSyncAnswer.java index 5db7f4ea326..caf51f2d2b1 100644 --- a/api/src/com/cloud/agent/api/ClusterSyncAnswer.java +++ b/api/src/com/cloud/agent/api/ClusterSyncAnswer.java @@ -25,12 +25,7 @@ import com.cloud.vm.VirtualMachine.State; public class ClusterSyncAnswer extends Answer { private long _clusterId; private HashMap> _newStates; - private HashMap> _allStates; - private int _type = -1; // 0 for full, 1 for delta private boolean _isExecuted=false; - - public static final int FULL_SYNC=0; - public static final int DELTA_SYNC=1; // this is here because a cron command answer is being sent twice // AgentAttache.processAnswers @@ -47,19 +42,9 @@ public class ClusterSyncAnswer extends Answer { public ClusterSyncAnswer(long clusterId, HashMap> newStates){ _clusterId = clusterId; _newStates = newStates; - _allStates = null; - _type = DELTA_SYNC; result = true; } - public ClusterSyncAnswer(long clusterId, HashMap> newStates, HashMap> allStates){ - _clusterId = clusterId; - _newStates = newStates; - _allStates = allStates; - _type = FULL_SYNC; - result = true; - } - public long getClusterId() { return _clusterId; } @@ -68,15 +53,4 @@ public class ClusterSyncAnswer extends Answer { return _newStates; } - public HashMap> getAllStates() { - return _allStates; - } - - public boolean isFull(){ - return _type==0; - } - - public boolean isDelta(){ - return _type==1; - } } \ No newline at end of file diff --git a/api/src/com/cloud/agent/api/ClusterSyncCommand.java b/api/src/com/cloud/agent/api/ClusterSyncCommand.java index f70a8398a21..7411e3053c3 100644 --- a/api/src/com/cloud/agent/api/ClusterSyncCommand.java +++ b/api/src/com/cloud/agent/api/ClusterSyncCommand.java @@ -20,19 +20,15 @@ package com.cloud.agent.api; public class ClusterSyncCommand extends Command implements CronCommand { int _interval; - int _skipSteps; // skip this many steps for full sync - int _steps; long _clusterId; public ClusterSyncCommand() { } - public ClusterSyncCommand(int interval, int skipSteps, long clusterId){ + public ClusterSyncCommand(int interval, long clusterId){ _interval = interval; - _skipSteps = skipSteps; _clusterId = clusterId; - _steps=0; } @Override @@ -40,19 +36,6 @@ public class ClusterSyncCommand extends Command implements CronCommand { return _interval; } - public int getSkipSteps(){ - return _skipSteps; - } - - public void incrStep(){ - _steps++; - if (_steps>=_skipSteps)_steps=0; - } - - public boolean isRightStep(){ - return (_steps==0); - } - public long getClusterId() { return _clusterId; } diff --git a/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java b/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java index bbeb9bb5e96..ddb20c11c84 100755 --- a/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java +++ b/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java @@ -6626,16 +6626,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe return new Answer(cmd); } HashMap> newStates = deltaClusterSync(conn); - cmd.incrStep(); - if (cmd.isRightStep()){ - // do full sync - HashMap> allStates=fullClusterSync(conn); - return new ClusterSyncAnswer(cmd.getClusterId(), newStates, allStates); - } - else { - cmd.incrStep(); - return new ClusterSyncAnswer(cmd.getClusterId(), newStates); - } + return new ClusterSyncAnswer(cmd.getClusterId(), newStates); } diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index 51046983778..bf8d784143f 100755 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -160,7 +160,6 @@ public enum Config { PingInterval("Advanced", AgentManager.class, Integer.class, "ping.interval", "60", "Ping interval in seconds", null), PingTimeout("Advanced", AgentManager.class, Float.class, "ping.timeout", "2.5", "Multiplier to ping.interval before announcing an agent has timed out", null), ClusterDeltaSyncInterval("Advanced", AgentManager.class, Integer.class, "sync.interval", "60", "Cluster Delta sync interval in seconds", null), - ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class, "skip.steps", "60", "Cluster full sync skip steps count", null), Port("Advanced", AgentManager.class, Integer.class, "port", "8250", "Port to listen on for agent connection.", null), RouterCpuMHz("Advanced", NetworkManager.class, Integer.class, "router.cpu.mhz", String.valueOf(VirtualNetworkApplianceManager.DEFAULT_ROUTER_CPU_MHZ), "Default CPU speed (MHz) for router VM.", null), RestartRetryInterval("Advanced", HighAvailabilityManager.class, Integer.class, "restart.retry.interval", "600", "Time (in seconds) between retries to restart a vm", null), diff --git a/server/src/com/cloud/host/dao/HostDao.java b/server/src/com/cloud/host/dao/HostDao.java index 4e2bded2e6e..eb5565c7971 100755 --- a/server/src/com/cloud/host/dao/HostDao.java +++ b/server/src/com/cloud/host/dao/HostDao.java @@ -66,5 +66,7 @@ public interface HostDao extends GenericDao, StateDao findAndUpdateApplianceToLoad(long lastPingSecondsAfter, long managementServerId); - boolean updateResourceState(ResourceState oldState, ResourceState.Event event, ResourceState newState, Host vo); + boolean updateResourceState(ResourceState oldState, ResourceState.Event event, ResourceState newState, Host vo); + + HostVO findByGuid(String guid); } diff --git a/server/src/com/cloud/host/dao/HostDaoImpl.java b/server/src/com/cloud/host/dao/HostDaoImpl.java index b9777dca70f..471f4e40969 100755 --- a/server/src/com/cloud/host/dao/HostDaoImpl.java +++ b/server/src/com/cloud/host/dao/HostDaoImpl.java @@ -319,6 +319,14 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao List hosts = listBy(sc); return hosts.size(); } + + + @Override + public HostVO findByGuid(String guid) { + SearchCriteria sc = GuidSearch.create("guid", guid); + return findOneBy(sc); + } + @Override @DB public List findAndUpdateDirectAgentToLoad(long lastPingSecondsAfter, Long limit, long managementServerId) { Transaction txn = Transaction.currentTxn(); diff --git a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java index 8d1b0c84e9b..f9c07a7e28a 100755 --- a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -1706,43 +1706,62 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } - public void fullSync(final long clusterId, Map> newStates, boolean init) { + public void fullSync(final long clusterId, Map> newStates) { + if (newStates==null)return; Map infos = convertToInfos(newStates); Set set_vms = Collections.synchronizedSet(new HashSet()); set_vms.addAll(_vmDao.listByClusterId(clusterId)); - set_vms.addAll(_vmDao.listStartingByClusterId(clusterId)); - + set_vms.addAll(_vmDao.listLHByClusterId(clusterId)); + for (VMInstanceVO vm : set_vms) { if (vm.isRemoved() || vm.getState() == State.Destroyed || vm.getState() == State.Expunging) continue; AgentVmInfo info = infos.remove(vm.getId()); - if (init){ // mark the VMs real state on initial sync - VMInstanceVO castedVm = null; - if (info == null && vm.getState() == State.Running) { // only work on VMs which were supposed to be running earlier - info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped); - castedVm = info.guru.findById(vm.getId()); - try { - Host host = _resourceMgr.findHostByGuid(info.getHostUuid()); - long hostId = host == null ? (vm.getHostId() == null ? vm.getLastHostId() : vm.getHostId()) : host.getId(); - HypervisorGuru hvGuru = _hvGuruMgr.getGuru(castedVm.getHypervisorType()); - Command command = compareState(hostId, castedVm, info, true, hvGuru.trackVmHostChange()); - if (command != null){ - Answer answer = _agentMgr.send(hostId, command); - if (!answer.getResult()) { - s_logger.warn("Failed to update state of the VM due to " + answer.getDetails()); - } - } - } catch (Exception e) { - s_logger.warn("Unable to update state of the VM due to exception " + e.getMessage()); - e.printStackTrace(); - } + VMInstanceVO castedVm = null; + if ((info == null && (vm.getState() == State.Running || vm.getState() == State.Starting)) + || (info != null && (info.state == State.Running && vm.getState() == State.Starting))) + { + s_logger.info("Found vm " + vm.getInstanceName() + " in inconsistent state. " + vm.getState() + " on CS while " + (info == null ? "Stopped" : "Running") + " on agent"); + info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped); + vm.setState(State.Running); // set it as running and let HA take care of it + _vmDao.persist(vm); + castedVm = info.guru.findById(vm.getId()); + try { + Host host = _hostDao.findByGuid(info.getHostUuid()); + long hostId = host == null ? (vm.getHostId() == null ? vm.getLastHostId() : vm.getHostId()) : host.getId(); + HypervisorGuru hvGuru = _hvGuruMgr.getGuru(castedVm.getHypervisorType()); + Command command = compareState(hostId, castedVm, info, true, hvGuru.trackVmHostChange()); + if (command != null){ + Answer answer = _agentMgr.send(hostId, command); + if (!answer.getResult()) { + s_logger.warn("Failed to update state of the VM due to " + answer.getDetails()); + } + } + } catch (Exception e) { + s_logger.warn("Unable to update state of the VM due to exception " + e.getMessage()); + e.printStackTrace(); } - } - - } + } + else + // host id can change + if (info != null && vm.getState() == State.Running){ + // check for host id changes + Host host = _hostDao.findByGuid(info.getHostUuid()); + if (host != null && (vm.getHostId() == null || host.getId() != vm.getHostId())){ + s_logger.info("Found vm " + vm.getInstanceName() + " with inconsistent host in db, new host is " + host.getId()); + try { + stateTransitTo(vm, VirtualMachine.Event.AgentReportMigrated, host.getId()); + } catch (NoTransitionException e) { + s_logger.warn(e.getMessage()); + } + } + } + } + for (final AgentVmInfo left : infos.values()) { + if (VirtualMachineName.isValidVmName(left.name)) continue; // if the vm follows cloudstack naming ignore it for stopping try { - Host host = _resourceMgr.findHostByGuid(left.getHostUuid()); + Host host = _hostDao.findByGuid(left.getHostUuid()); if (host != null){ s_logger.warn("Stopping a VM which we do not have any record of " + left.name); Answer answer = _agentMgr.send(host.getId(), cleanup(left.name)); @@ -1754,10 +1773,11 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.warn("Unable to stop a VM due to " + e.getMessage()); } } - + } + protected Map convertToInfos(final Map> newStates) { final HashMap map = new HashMap(); if (newStates == null) { @@ -2088,12 +2108,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene if (answer instanceof ClusterSyncAnswer) { ClusterSyncAnswer hs = (ClusterSyncAnswer) answer; if (!hs.isExceuted()){ - if (hs.isFull()) { - deltaSync(hs.getNewStates()); - fullSync(hs.getClusterId(), hs.getAllStates(), false); - } else if (hs.isDelta()){ - deltaSync(hs.getNewStates()); - } + deltaSync(hs.getNewStates()); hs.setExecuted(); } } else if (!answer.getResult()) { @@ -2171,12 +2186,11 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene StartupRoutingCommand startup = (StartupRoutingCommand) cmd; HashMap> allStates = startup.getClusterVMStateChanges(); if (allStates != null){ - this.fullSync(clusterId, allStates, true); + this.fullSync(clusterId, allStates); } // initiate the cron job - ClusterSyncCommand syncCmd = new ClusterSyncCommand(Integer.parseInt(Config.ClusterDeltaSyncInterval.getDefaultValue()), - Integer.parseInt(Config.ClusterFullSyncSkipSteps.getDefaultValue()), clusterId); + ClusterSyncCommand syncCmd = new ClusterSyncCommand(Integer.parseInt(Config.ClusterDeltaSyncInterval.getDefaultValue()), clusterId); try { long seq_no = _agentMgr.send(agentId, new Commands(syncCmd), this); s_logger.debug("Cluster VM sync started with jobid " + seq_no); diff --git a/server/src/com/cloud/vm/dao/VMInstanceDao.java b/server/src/com/cloud/vm/dao/VMInstanceDao.java index 1a1fb03c708..b69bce23349 100644 --- a/server/src/com/cloud/vm/dao/VMInstanceDao.java +++ b/server/src/com/cloud/vm/dao/VMInstanceDao.java @@ -84,7 +84,7 @@ public interface VMInstanceDao extends GenericDao, StateDao< public Long countAllocatedVirtualRoutersForAccount(long accountId); List listByClusterId(long clusterId); // this does not pull up VMs which are starting - List listStartingByClusterId(long clusterId); // get all the VMs even starting one on this cluster + List listLHByClusterId(long clusterId); // get all the VMs even starting one on this cluster List listVmsMigratingFromHost(Long hostId); diff --git a/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java b/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java index d0bf40f24d4..82a9f261b0b 100644 --- a/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -59,7 +59,7 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem public static final Logger s_logger = Logger.getLogger(VMInstanceDaoImpl.class); protected final SearchBuilder VMClusterSearch; - protected final SearchBuilder StartingVMClusterSearch; + protected final SearchBuilder LHVMClusterSearch; protected final SearchBuilder IdStatesSearch; protected final SearchBuilder AllFieldsSearch; protected final SearchBuilder ZoneTemplateNonExpungedSearch; @@ -102,11 +102,11 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem VMClusterSearch.done(); - StartingVMClusterSearch = createSearchBuilder(); + LHVMClusterSearch = createSearchBuilder(); SearchBuilder hostSearch1 = _hostDao.createSearchBuilder(); - StartingVMClusterSearch.join("hostSearch1", hostSearch1, hostSearch1.entity().getId(), StartingVMClusterSearch.entity().getHostId(), JoinType.INNER); + LHVMClusterSearch.join("hostSearch1", hostSearch1, hostSearch1.entity().getId(), LHVMClusterSearch.entity().getLastHostId(), JoinType.INNER); hostSearch1.and("clusterId", hostSearch1.entity().getClusterId(), SearchCriteria.Op.EQ); - StartingVMClusterSearch.done(); + LHVMClusterSearch.done(); AllFieldsSearch = createSearchBuilder(); @@ -227,8 +227,8 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem @Override - public List listStartingByClusterId(long clusterId) { - SearchCriteria sc = StartingVMClusterSearch.create(); + public List listLHByClusterId(long clusterId) { + SearchCriteria sc = LHVMClusterSearch.create(); sc.setJoinParameters("hostSearch1", "clusterId", clusterId); return listBy(sc); }