diff --git a/api/src/com/cloud/agent/api/ClusterSyncAnswer.java b/api/src/com/cloud/agent/api/ClusterSyncAnswer.java index 9c355da1f38..6dfda2f438e 100644 --- a/api/src/com/cloud/agent/api/ClusterSyncAnswer.java +++ b/api/src/com/cloud/agent/api/ClusterSyncAnswer.java @@ -27,6 +27,7 @@ public class ClusterSyncAnswer extends Answer { private HashMap> _newStates; private HashMap> _allStates; private int _type = -1; // 0 for full, 1 for delta + private boolean _isExecuted=false; public static final int FULL_SYNC=0; public static final int DELTA_SYNC=1; @@ -38,6 +39,17 @@ public class ClusterSyncAnswer extends Answer { _type = -1; } + // this is here because a cron command answer is being sent twice + // AgentAttache.processAnswers + // AgentManagerImpl.notifyAnswersToMonitors + public boolean isExceuted(){ + return _isExecuted; + } + + public void setExecuted(){ + _isExecuted = true; + } + public ClusterSyncAnswer(long clusterId, HashMap> newStates){ _clusterId = clusterId; diff --git a/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java b/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java index 630ca5636f9..96fb72c43a3 100755 --- a/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java +++ b/core/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java @@ -6573,7 +6573,6 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe protected HashMap> fullClusterSync(Connection conn) { XenServerPoolVms vms = new XenServerPoolVms(); try { - Host lhost = Host.getByUuid(conn, _host.uuid); Map vm_map = VM.getAllRecords(conn); //USE THIS TO GET ALL VMS FROM A CLUSTER for (VM.Record record: vm_map.values()) { if (record.isControlDomain || record.isASnapshot || record.isATemplate) { @@ -6627,6 +6626,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe //check if host is changed if (host_uuid != null && oldState != null){ if (!host_uuid.equals(oldState.first()) && newState != State.Stopped && newState != State.Stopping){ + s_logger.warn("Detecting a change in host for " + vm); changes.put(vm, new Pair(host_uuid, newState)); s_vms.put(_cluster, host_uuid, vm, newState); continue; @@ -6636,7 +6636,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe if (newState == State.Stopped && oldState != null && oldState.second() != State.Stopping && oldState.second() != State.Stopped) { newState = getRealPowerState(conn, vm); } - + if (s_logger.isTraceEnabled()) { s_logger.trace("VM " + vm + ": xen has state " + newState + " and we have state " + (oldState != null ? oldState.toString() : "null")); } @@ -6682,7 +6682,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe final String vm = entry.getKey(); final State oldState = entry.getValue().second(); String host_uuid = entry.getValue().first(); - + if (s_logger.isTraceEnabled()) { s_logger.trace("VM " + vm + " is now missing from xen so reporting stopped"); } @@ -6697,12 +6697,11 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe } else if (oldState == State.Migrating) { s_logger.warn("Ignoring VM " + vm + " in migrating state."); } else { - //State newState = State.Stopped; - //changes.put(vm, new Pair(host_uuid, newState)); + State newState = State.Stopped; + changes.put(vm, new Pair(host_uuid, newState)); } } } - return changes; } diff --git a/core/src/com/cloud/hypervisor/xen/resource/XenServerPoolVms.java b/core/src/com/cloud/hypervisor/xen/resource/XenServerPoolVms.java index 59c4e45a867..6d1e5850cdb 100644 --- a/core/src/com/cloud/hypervisor/xen/resource/XenServerPoolVms.java +++ b/core/src/com/cloud/hypervisor/xen/resource/XenServerPoolVms.java @@ -13,8 +13,9 @@ import com.cloud.vm.VirtualMachine.State; public class XenServerPoolVms { private static final Logger s_logger = Logger.getLogger(XenServerPoolVms.class); - HashMap>> _cluster_vms = + private HashMap>> _cluster_vms = new HashMap>>(); + private long _last_sync_time=0; public HashMap> getClusterVmState(String clusterId){ HashMap> _vms= _cluster_vms.get(clusterId); @@ -64,10 +65,9 @@ public class XenServerPoolVms { HashMap> vms= getClusterVmState(clusterId); return vms.size(); } - - public static void main(String args[]){ - XenServerPoolVms vms = new XenServerPoolVms(); + public void initSyncTime(){ + _last_sync_time = System.currentTimeMillis(); } @Override diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index 26588a27547..771c3aa71c1 100755 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -159,8 +159,7 @@ public enum Config { PingInterval("Advanced", AgentManager.class, Integer.class, "ping.interval", "60", "Ping interval in seconds", null), PingTimeout("Advanced", AgentManager.class, Float.class, "ping.timeout", "2.5", "Multiplier to ping.interval before announcing an agent has timed out", null), ClusterDeltaSyncInterval("Advanced", AgentManager.class, Integer.class, "sync.interval", "60", "Cluster Delta sync interval in seconds", null), - ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class, - "skip.steps", "525600", "Cluster full sync skip steps count", null), + ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class, "skip.steps", "60", "Cluster full sync skip steps count", null), Port("Advanced", AgentManager.class, Integer.class, "port", "8250", "Port to listen on for agent connection.", null), RouterCpuMHz("Advanced", NetworkManager.class, Integer.class, "router.cpu.mhz", String.valueOf(VirtualNetworkApplianceManager.DEFAULT_ROUTER_CPU_MHZ), "Default CPU speed (MHz) for router VM.", null), RestartRetryInterval("Advanced", HighAvailabilityManager.class, Integer.class, "restart.retry.interval", "600", "Time (in seconds) between retries to restart a vm", null), diff --git a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java index 882a7d31990..3b3ec39b5c7 100755 --- a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -139,6 +139,7 @@ import com.cloud.utils.component.Inject; import com.cloud.utils.concurrency.NamedThreadFactory; import com.cloud.utils.db.DB; import com.cloud.utils.db.GlobalLock; +import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.Transaction; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.utils.exception.ExecutionException; @@ -1608,6 +1609,27 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene commands.addCommand(command); } } + + + final List vmsz = _vmDao.listByHostId(hostId); + s_logger.debug("Found " + vmsz.size() + " VMs for host " + hostId); + for (VMInstanceVO vm : vmsz) { + AgentVmInfo info = infos.remove(vm.getId()); + VMInstanceVO castedVm = null; + if (info == null) { + info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped); + castedVm = info.guru.findById(vm.getId()); + } else { + castedVm = info.vm; + } + + HypervisorGuru hvGuru = _hvGuruMgr.getGuru(castedVm.getHypervisorType()); + + Command command = compareState(hostId, castedVm, info, true, hvGuru.trackVmHostChange()); + if (command != null) { + commands.addCommand(command); + } + } for (final AgentVmInfo left : infos.values()) { boolean found = false; @@ -1629,7 +1651,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } } if ( ! found ) { - s_logger.warn("Stopping a VM that we have no record of: " + left.name); + s_logger.warn("Stopping a VM that we have no record of : " + left.name); commands.addCommand(cleanup(left.name)); } } @@ -1667,50 +1689,70 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene - public Commands deltaSync(Map> newStates) { + public void deltaSync(Map> newStates) { Map states = convertToInfos(newStates); - Commands commands = new Commands(OnError.Continue); for (Map.Entry entry : states.entrySet()) { AgentVmInfo info = entry.getValue(); VMInstanceVO vm = info.vm; Command command = null; if (vm != null) { - String hostGuid = info.getHostUuid(); - Host host = _resourceMgr.findHostByGuid(hostGuid); + Host host = _resourceMgr.findHostByGuid(info.getHostUuid()); long hId = host.getId(); HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); command = compareState(hId, vm, info, false, hvGuru.trackVmHostChange()); } else { if (s_logger.isDebugEnabled()) { - s_logger.debug("Cleaning up a VM that is no longer found: " + info.name); + s_logger.debug("Cleaning up a VM that is no longer found : " + info.name); } command = cleanup(info.name); } - - if (command != null) { - commands.addCommand(command); + if (command != null){ + try { + Host host = _resourceMgr.findHostByGuid(info.getHostUuid()); + if (host != null){ + Answer answer = _agentMgr.send(host.getId(), cleanup(info.name)); + if (!answer.getResult()) { + s_logger.warn("Unable to stop a VM due to " + answer.getDetails()); + } + } + } catch (Exception e) { + s_logger.warn("Unable to stop a VM due to " + e.getMessage()); + } } } - - return commands; } - public Commands fullSync(final long clusterId, Map> newStates) { - Commands commands = new Commands(OnError.Continue); + public void fullSync(final long clusterId, Map> newStates) { Map infos = convertToInfos(newStates); - long hId = 0; - final List vms = _vmDao.listByClusterId(clusterId); + List vms = _vmDao.listByClusterId(clusterId); for (VMInstanceVO vm : vms) { - AgentVmInfo info = infos.remove(vm.getId()); + if (vm.isRemoved() || vm.getState() == State.Destroyed || vm.getState() == State.Expunging) continue; + infos.remove(vm.getId()); + } + // some VMs may be starting and will have last host id null + vms = _vmDao.listStartingByClusterId(clusterId); + for (VMInstanceVO vm : vms) { + if (vm.isRemoved() || vm.getState() == State.Destroyed || vm.getState() == State.Expunging) continue; + infos.remove(vm.getId()); } for (final AgentVmInfo left : infos.values()) { - s_logger.warn("Stopping a VM that we have no record of: " + left.name); - commands.addCommand(cleanup(left.name)); + try { + Host host = _resourceMgr.findHostByGuid(left.getHostUuid()); + if (host != null){ + s_logger.warn("Stopping a VM which we do not have any record of " + left.name); + Answer answer = _agentMgr.send(host.getId(), cleanup(left.name)); + if (!answer.getResult()) { + s_logger.warn("Unable to stop a VM due to " + answer.getDetails()); + } + } + } catch (Exception e) { + s_logger.warn("Unable to stop a VM due to " + e.getMessage()); + } } - return commands; + } @@ -2040,11 +2082,14 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene for (final Answer answer : answers) { if (answer instanceof ClusterSyncAnswer) { ClusterSyncAnswer hs = (ClusterSyncAnswer) answer; - if (hs.isFull()) { - deltaSync(hs.getNewStates()); - fullSync(hs.getClusterId(), hs.getAllStates()); - } else if (hs.isDelta()){ - deltaSync(hs.getNewStates()); + if (!hs.isExceuted()){ + if (hs.isFull()) { + deltaSync(hs.getNewStates()); + fullSync(hs.getClusterId(), hs.getAllStates()); + } else if (hs.isDelta()){ + deltaSync(hs.getNewStates()); + } + hs.setExecuted(); } } else if (!answer.getResult()) { s_logger.warn("Cleanup failed due to " + answer.getDetails()); diff --git a/server/src/com/cloud/vm/dao/VMInstanceDao.java b/server/src/com/cloud/vm/dao/VMInstanceDao.java index 17796d89dd6..1a1fb03c708 100644 --- a/server/src/com/cloud/vm/dao/VMInstanceDao.java +++ b/server/src/com/cloud/vm/dao/VMInstanceDao.java @@ -83,7 +83,9 @@ public interface VMInstanceDao extends GenericDao, StateDao< List listByAccountId(long accountId); public Long countAllocatedVirtualRoutersForAccount(long accountId); - List listByClusterId(long clusterId); + List listByClusterId(long clusterId); // this does not pull up VMs which are starting + List listStartingByClusterId(long clusterId); // get all the VMs even starting one on this cluster + List listVmsMigratingFromHost(Long hostId); public Long countRunningByHostId(long hostId); diff --git a/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java b/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java index d0edac30511..d0bf40f24d4 100644 --- a/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/server/src/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -59,6 +59,7 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem public static final Logger s_logger = Logger.getLogger(VMInstanceDaoImpl.class); protected final SearchBuilder VMClusterSearch; + protected final SearchBuilder StartingVMClusterSearch; protected final SearchBuilder IdStatesSearch; protected final SearchBuilder AllFieldsSearch; protected final SearchBuilder ZoneTemplateNonExpungedSearch; @@ -87,6 +88,7 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem " GROUP BY host.id ORDER BY 2 ASC "; protected final HostDaoImpl _hostDao = ComponentLocator.inject(HostDaoImpl.class); + protected VMInstanceDaoImpl() { IdStatesSearch = createSearchBuilder(); IdStatesSearch.and("id", IdStatesSearch.entity().getId(), Op.EQ); @@ -99,6 +101,14 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem hostSearch.and("clusterId", hostSearch.entity().getClusterId(), SearchCriteria.Op.EQ); VMClusterSearch.done(); + + StartingVMClusterSearch = createSearchBuilder(); + SearchBuilder hostSearch1 = _hostDao.createSearchBuilder(); + StartingVMClusterSearch.join("hostSearch1", hostSearch1, hostSearch1.entity().getId(), StartingVMClusterSearch.entity().getHostId(), JoinType.INNER); + hostSearch1.and("clusterId", hostSearch1.entity().getClusterId(), SearchCriteria.Op.EQ); + StartingVMClusterSearch.done(); + + AllFieldsSearch = createSearchBuilder(); AllFieldsSearch.and("host", AllFieldsSearch.entity().getHostId(), Op.EQ); AllFieldsSearch.and("lastHost", AllFieldsSearch.entity().getLastHostId(), Op.EQ); @@ -212,10 +222,17 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem public List listByClusterId(long clusterId) { SearchCriteria sc = VMClusterSearch.create(); sc.setJoinParameters("hostSearch", "clusterId", clusterId); - return listBy(sc); } + + @Override + public List listStartingByClusterId(long clusterId) { + SearchCriteria sc = StartingVMClusterSearch.create(); + sc.setJoinParameters("hostSearch1", "clusterId", clusterId); + return listBy(sc); + } + @Override public List listByZoneIdAndType(long zoneId, VirtualMachine.Type type) { SearchCriteria sc = AllFieldsSearch.create();