From ca6fd0b92c2bb4b9650b7f008e8bf38b21b90096 Mon Sep 17 00:00:00 2001 From: abhi Date: Mon, 6 Feb 2012 13:57:36 +0530 Subject: [PATCH] bug 13439: full sync at management server restart, disabling hourly sync --- .../cloud/agent/api/ClusterSyncAnswer.java | 27 ------- .../cloud/agent/api/ClusterSyncCommand.java | 19 +---- .../xen/resource/CitrixResourceBase.java | 10 +-- .../src/com/cloud/configuration/Config.java | 1 - .../cloud/vm/VirtualMachineManagerImpl.java | 79 +++++++++++-------- .../src/com/cloud/vm/dao/VMInstanceDao.java | 2 +- .../com/cloud/vm/dao/VMInstanceDaoImpl.java | 12 +-- 7 files changed, 55 insertions(+), 95 deletions(-) diff --git a/api/src/com/cloud/agent/api/ClusterSyncAnswer.java b/api/src/com/cloud/agent/api/ClusterSyncAnswer.java index e0689b906f6..432fc316685 100644 --- a/api/src/com/cloud/agent/api/ClusterSyncAnswer.java +++ b/api/src/com/cloud/agent/api/ClusterSyncAnswer.java @@ -25,12 +25,7 @@ import com.cloud.vm.VirtualMachine.State; public class ClusterSyncAnswer extends Answer { private long _clusterId; private HashMap> _newStates; - private HashMap> _allStates; - private int _type = -1; // 0 for full, 1 for delta private boolean _isExecuted=false; - - public static final int FULL_SYNC=0; - public static final int DELTA_SYNC=1; // this is here because a cron command answer is being sent twice // AgentAttache.processAnswers @@ -47,19 +42,9 @@ public class ClusterSyncAnswer extends Answer { public ClusterSyncAnswer(long clusterId, HashMap> newStates){ _clusterId = clusterId; _newStates = newStates; - _allStates = null; - _type = DELTA_SYNC; result = true; } - public ClusterSyncAnswer(long clusterId, HashMap> newStates, HashMap> allStates){ - _clusterId = clusterId; - _newStates = newStates; - _allStates = allStates; - _type = FULL_SYNC; - result = true; - } - public long getClusterId() { return _clusterId; } @@ -67,16 +52,4 @@ public class ClusterSyncAnswer extends Answer { public HashMap> getNewStates() { return _newStates; } - - public HashMap> getAllStates() { - return _allStates; - } - - public boolean isFull(){ - return _type==0; - } - - public boolean isDelta(){ - return _type==1; - } } \ No newline at end of file diff --git a/api/src/com/cloud/agent/api/ClusterSyncCommand.java b/api/src/com/cloud/agent/api/ClusterSyncCommand.java index f70a8398a21..7411e3053c3 100644 --- a/api/src/com/cloud/agent/api/ClusterSyncCommand.java +++ b/api/src/com/cloud/agent/api/ClusterSyncCommand.java @@ -20,19 +20,15 @@ package com.cloud.agent.api; public class ClusterSyncCommand extends Command implements CronCommand { int _interval; - int _skipSteps; // skip this many steps for full sync - int _steps; long _clusterId; public ClusterSyncCommand() { } - public ClusterSyncCommand(int interval, int skipSteps, long clusterId){ + public ClusterSyncCommand(int interval, long clusterId){ _interval = interval; - _skipSteps = skipSteps; _clusterId = clusterId; - _steps=0; } @Override @@ -40,19 +36,6 @@ public class ClusterSyncCommand extends Command implements CronCommand { return _interval; } - public int getSkipSteps(){ - return _skipSteps; - } - - public void incrStep(){ - _steps++; - if (_steps>=_skipSteps)_steps=0; - } - - public boolean isRightStep(){ - return (_steps==0); - } - public long getClusterId() { return _clusterId; } diff --git a/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java b/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java index 82af103b498..3221228a418 100755 --- a/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java +++ b/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java @@ -6631,15 +6631,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe return new Answer(cmd); } HashMap> newStates = deltaClusterSync(conn); - cmd.incrStep(); - if (cmd.isRightStep()){ - // do full sync - HashMap> allStates=fullClusterSync(conn); - return new ClusterSyncAnswer(cmd.getClusterId(), newStates, allStates); - } - else { - return new ClusterSyncAnswer(cmd.getClusterId(), newStates); - } + return new ClusterSyncAnswer(cmd.getClusterId(), newStates); } protected HashMap> fullClusterSync(Connection conn) { diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index f60ffe96316..7f95179014a 100755 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -148,7 +148,6 @@ public enum Config { MigrateRetryInterval("Advanced", HighAvailabilityManager.class, Integer.class, "migrate.retry.interval", "120", "Time (in seconds) between migration retries", null), PingInterval("Advanced", AgentManager.class, Integer.class, "ping.interval", "60", "Ping interval in seconds", null), ClusterDeltaSyncInterval("Advanced", AgentManager.class, Integer.class, "sync.interval", "60", "Cluster Delta sync interval in seconds", null), - ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class, "skip.steps", "60", "Cluster full sync skip steps count", null), PingTimeout("Advanced", AgentManager.class, Float.class, "ping.timeout", "2.5", "Multiplier to ping.interval before announcing an agent has timed out", null), Port("Advanced", AgentManager.class, Integer.class, "port", "8250", "Port to listen on for agent connection.", null), RouterCpuMHz("Advanced", NetworkManager.class, Integer.class, "router.cpu.mhz", String.valueOf(VirtualNetworkApplianceManager.DEFAULT_ROUTER_CPU_MHZ), "Default CPU speed (MHz) for router VM.", null), diff --git a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java index ffa77b19504..fee510d1b1f 100755 --- a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -1647,43 +1647,61 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } - public void fullSync(final long clusterId, Map> newStates, boolean init) { + + public void fullSync(final long clusterId, Map> newStates) { + if (newStates==null)return; Map infos = convertToInfos(newStates); Set set_vms = Collections.synchronizedSet(new HashSet()); set_vms.addAll(_vmDao.listByClusterId(clusterId)); - set_vms.addAll(_vmDao.listStartingByClusterId(clusterId)); + set_vms.addAll(_vmDao.listLHByClusterId(clusterId)); for (VMInstanceVO vm : set_vms) { if (vm.isRemoved() || vm.getState() == State.Destroyed || vm.getState() == State.Expunging) continue; AgentVmInfo info = infos.remove(vm.getId()); - if (init){ // mark the VMs real state on initial sync VMInstanceVO castedVm = null; - if (info == null){ - if (vm.getState() == State.Running || vm.getState() == State.Starting) { // only work on VMs which were supposed to be starting/running earlier - info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped); - castedVm = info.guru.findById(vm.getId()); - try { - Host host = _hostDao.findByGuid(info.getHostUuid()); - long hostId = host == null ? (vm.getHostId() == null ? vm.getLastHostId() : vm.getHostId()) : host.getId(); - HypervisorGuru hvGuru = _hvGuruMgr.getGuru(castedVm.getHypervisorType()); - Command command = compareState(hostId, castedVm, info, true, hvGuru.trackVmHostChange()); - if (command != null){ - Answer answer = _agentMgr.send(hostId, command); - if (!answer.getResult()) { - s_logger.warn("Failed to update state of the VM due to " + answer.getDetails()); - } - } - } catch (Exception e) { - s_logger.warn("Unable to update state of the VM due to exception " + e.getMessage()); - e.printStackTrace(); + if ((info == null && (vm.getState() == State.Running || vm.getState() == State.Starting)) + || (info != null && (info.state == State.Running && vm.getState() == State.Starting))) + { + s_logger.info("Found vm " + vm.getInstanceName() + " in inconsistent state. " + vm.getState() + " on CS while " + (info == null ? "Stopped" : "Running") + " on agent"); + info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped); + vm.setState(State.Running); // set it as running and let HA take care of it + _vmDao.persist(vm); + castedVm = info.guru.findById(vm.getId()); + try { + Host host = _hostDao.findByGuid(info.getHostUuid()); + long hostId = host == null ? (vm.getHostId() == null ? vm.getLastHostId() : vm.getHostId()) : host.getId(); + HypervisorGuru hvGuru = _hvGuruMgr.getGuru(castedVm.getHypervisorType()); + Command command = compareState(hostId, castedVm, info, true, hvGuru.trackVmHostChange()); + if (command != null){ + Answer answer = _agentMgr.send(hostId, command); + if (!answer.getResult()) { + s_logger.warn("Failed to update state of the VM due to " + answer.getDetails()); } - } + } + } catch (Exception e) { + s_logger.warn("Unable to update state of the VM due to exception " + e.getMessage()); + e.printStackTrace(); } - } - + } + else { + // host id can change + if (info != null && vm.getState() == State.Running){ + // check for host id changes + Host host = _hostDao.findByGuid(info.getHostUuid()); + if (host != null && (vm.getHostId() == null || host.getId() != vm.getHostId())){ + s_logger.info("Found vm " + vm.getInstanceName() + " with inconsistent host in db, new host is " + host.getId()); + try { + stateTransitTo(vm, VirtualMachine.Event.AgentReportMigrated, host.getId()); + } catch (NoTransitionException e) { + s_logger.warn(e.getMessage()); + } + } + } + } } for (final AgentVmInfo left : infos.values()) { + if (VirtualMachineName.isValidVmName(left.name)) continue; // if the vm follows cloudstack naming ignore it for stopping try { Host host = _hostDao.findByGuid(left.getHostUuid()); if (host != null){ @@ -1701,6 +1719,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } + protected Map convertToInfos(final Map> newStates) { final HashMap map = new HashMap(); if (newStates == null) { @@ -2007,12 +2026,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene for (final Answer answer : answers) { if (answer instanceof ClusterSyncAnswer) { ClusterSyncAnswer hs = (ClusterSyncAnswer) answer; - if (hs.isFull()) { - deltaSync(hs.getNewStates()); - fullSync(hs.getClusterId(), hs.getAllStates(), false); - } else if (hs.isDelta()) { - deltaSync(hs.getNewStates()); - } + deltaSync(hs.getNewStates()); } else if (!answer.getResult()) { s_logger.warn("Cleanup failed due to " + answer.getDetails() + " for " + answer.getClass().getName()); } else { @@ -2083,11 +2097,10 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene StartupRoutingCommand startup = (StartupRoutingCommand) cmd; HashMap> allStates = startup.getClusterVMStateChanges(); if (allStates != null){ - this.fullSync(clusterId, allStates, true); + this.fullSync(clusterId, allStates); } - ClusterSyncCommand syncCmd = new ClusterSyncCommand(Integer.parseInt(Config.ClusterDeltaSyncInterval.getDefaultValue()), - Integer.parseInt(Config.ClusterFullSyncSkipSteps.getDefaultValue()), clusterId); + ClusterSyncCommand syncCmd = new ClusterSyncCommand(Integer.parseInt(Config.ClusterDeltaSyncInterval.getDefaultValue()), clusterId); try { long seq_no = _agentMgr.send(agentId, new Commands(syncCmd), this); s_logger.debug("Cluster VM sync started with jobid " + seq_no); diff --git a/server/src/com/cloud/vm/dao/VMInstanceDao.java b/server/src/com/cloud/vm/dao/VMInstanceDao.java index 68bd7eed8d4..6040f79aa2d 100644 --- a/server/src/com/cloud/vm/dao/VMInstanceDao.java +++ b/server/src/com/cloud/vm/dao/VMInstanceDao.java @@ -82,6 +82,6 @@ public interface VMInstanceDao extends GenericDao, StateDao< public Long countAllocatedVirtualRoutersForAccount(long accountId); List listByClusterId(long clusterId); - List listStartingByClusterId(long clusterId); // get all the VMs even starting one on this cluster + List listLHByClusterId(long clusterId); List listVmsMigratingFromHost(Long hostId); } diff --git a/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java b/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java index 8d41e8bda1b..2e67c05bdbd 100644 --- a/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -50,7 +50,7 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem public static final Logger s_logger = Logger.getLogger(VMInstanceDaoImpl.class); protected final SearchBuilder VMClusterSearch; - protected final SearchBuilder StartingVMClusterSearch; + protected final SearchBuilder listLHByClusterId; protected final SearchBuilder IdStatesSearch; protected final SearchBuilder AllFieldsSearch; protected final SearchBuilder ZoneTemplateNonExpungedSearch; @@ -80,11 +80,11 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem VMClusterSearch.done(); - StartingVMClusterSearch = createSearchBuilder(); + listLHByClusterId = createSearchBuilder(); SearchBuilder hostSearch1 = _hostDao.createSearchBuilder(); - StartingVMClusterSearch.join("hostSearch1", hostSearch1, hostSearch1.entity().getId(), StartingVMClusterSearch.entity().getHostId(), JoinType.INNER); + listLHByClusterId.join("hostSearch1", hostSearch1, hostSearch1.entity().getId(), listLHByClusterId.entity().getLastHostId(), JoinType.INNER); hostSearch1.and("clusterId", hostSearch1.entity().getClusterId(), SearchCriteria.Op.EQ); - StartingVMClusterSearch.done(); + listLHByClusterId.done(); AllFieldsSearch = createSearchBuilder(); AllFieldsSearch.and("host", AllFieldsSearch.entity().getHostId(), Op.EQ); @@ -193,8 +193,8 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem @Override - public List listStartingByClusterId(long clusterId) { - SearchCriteria sc = StartingVMClusterSearch.create(); + public List listLHByClusterId(long clusterId) { + SearchCriteria sc = listLHByClusterId.create(); sc.setJoinParameters("hostSearch1", "clusterId", clusterId); return listBy(sc); }