diff --git a/api/src/com/cloud/host/Status.java b/api/src/com/cloud/host/Status.java index 94f8bcb4bf8..7b932e4762d 100644 --- a/api/src/com/cloud/host/Status.java +++ b/api/src/com/cloud/host/Status.java @@ -78,7 +78,8 @@ public enum Status { StartAgentRebalance(false, "Start rebalance for the certain host"), RebalanceCompleted(false, "Host is rebalanced successfully"), RebalanceFailed(false, "Failed to rebalance the host"), - PrepareUnmanaged(true, "prepare for cluster entering unmanaged status"); + PrepareUnmanaged(true, "prepare for cluster entering unmanaged status"), + HypervisorVersionChanged(false, " hypervisor version changed when host is reconnected"); private final boolean isUserRequest; private final String comment; @@ -130,6 +131,7 @@ public enum Status { s_fsm.addTransition(Status.Connecting, Event.Ping, Status.Connecting); s_fsm.addTransition(Status.Connecting, Event.ManagementServerDown, Status.Disconnected); s_fsm.addTransition(Status.Connecting, Event.AgentDisconnected, Status.Alert); + s_fsm.addTransition(Status.Connecting, Event.HypervisorVersionChanged, Status.Disconnected); s_fsm.addTransition(Status.Up, Event.PingTimeout, Status.Alert); s_fsm.addTransition(Status.Up, Event.MaintenanceRequested, Status.PrepareForMaintenance); s_fsm.addTransition(Status.Up, Event.AgentDisconnected, Status.Alert); @@ -140,6 +142,7 @@ public enum Status { s_fsm.addTransition(Status.Up, Event.ManagementServerDown, Status.Disconnected); s_fsm.addTransition(Status.Up, Event.StartAgentRebalance, Status.Rebalancing); s_fsm.addTransition(Status.Up, Event.PrepareUnmanaged, Status.Disconnected); + s_fsm.addTransition(Status.Up, Event.HypervisorVersionChanged, Status.Disconnected); s_fsm.addTransition(Status.Updating, Event.PingTimeout, Status.Alert); s_fsm.addTransition(Status.Updating, Event.Ping, Status.Updating); s_fsm.addTransition(Status.Updating, Event.AgentConnected, Status.Connecting); @@ -176,6 +179,7 @@ public enum Status { s_fsm.addTransition(Status.Disconnected, Event.ManagementServerDown, Status.Disconnected); s_fsm.addTransition(Status.Disconnected, Event.WaitedTooLong, Status.Alert); s_fsm.addTransition(Status.Disconnected, Event.Remove, Status.Removed); + s_fsm.addTransition(Status.Disconnected, Event.HypervisorVersionChanged, Status.Disconnected); s_fsm.addTransition(Status.Down, Event.MaintenanceRequested, Status.PrepareForMaintenance); s_fsm.addTransition(Status.Down, Event.AgentConnected, Status.Connecting); s_fsm.addTransition(Status.Down, Event.Remove, Status.Removed); diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index e8a79399466..754ea856e12 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -146,6 +146,7 @@ import com.cloud.utils.db.DB; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.Transaction; import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.exception.HypervisorVersionChangedException; import com.cloud.utils.net.Ip; import com.cloud.utils.net.NetUtils; import com.cloud.utils.nio.HandlerFactory; @@ -1097,7 +1098,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { _hostDao.disconnect(host, event, _nodeId); host = _hostDao.findById(host.getId()); - if (!event.equals(Event.PrepareUnmanaged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) { + if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) { _haMgr.scheduleRestartForVmsOnHost(host, investigate); } @@ -1133,6 +1134,9 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { handleDisconnect(attache, Event.ShutdownRequested, false); return attache; } + } else if (e instanceof HypervisorVersionChangedException) { + handleDisconnect(attache, Event.HypervisorVersionChanged, false); + throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); } else { s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e); handleDisconnect(attache, Event.AgentDisconnected, false); diff --git a/server/src/com/cloud/host/dao/HostDaoImpl.java b/server/src/com/cloud/host/dao/HostDaoImpl.java index cc9fd0c34b7..8a6f9240738 100755 --- a/server/src/com/cloud/host/dao/HostDaoImpl.java +++ b/server/src/com/cloud/host/dao/HostDaoImpl.java @@ -678,7 +678,7 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao List result = new ArrayList(); ResultSet rs = null; try { - String sql = "select h.id from host h left join cluster c on h.cluster_id=c.id where h.last_ping < ? and h.status in ('Up', 'Updating', 'Disconnected', 'Connecting') and h.type not in ('ExternalFirewall', 'ExternalLoadBalancer', 'TrafficMonitor', 'SecondaryStorage', 'LocalSecondaryStorage') and (h.cluster_id is null or c.managed_state = 'Managed') ;" ; + String sql = "select h.id from host h left join cluster c on h.cluster_id=c.id where h.mgmt_server_id is not null and h.last_ping < ? and h.status in ('Up', 'Updating', 'Disconnected', 'Connecting') and h.type not in ('ExternalFirewall', 'ExternalLoadBalancer', 'TrafficMonitor', 'SecondaryStorage', 'LocalSecondaryStorage') and (h.cluster_id is null or c.managed_state = 'Managed') ;" ; pstmt = txn.prepareStatement(sql); pstmt.setLong(1, timeout); rs = pstmt.executeQuery(); diff --git a/server/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java b/server/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java index c708c8ba5ef..16ed0287e00 100755 --- a/server/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java +++ b/server/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java @@ -75,6 +75,7 @@ import com.cloud.user.Account; import com.cloud.utils.NumbersUtil; import com.cloud.utils.component.Inject; import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.exception.HypervisorVersionChangedException; import com.xensource.xenapi.Connection; import com.xensource.xenapi.Host; import com.xensource.xenapi.Pool; @@ -543,7 +544,7 @@ public class XcpServerDiscoverer extends DiscovererBase implements Discoverer, L _hostDao.update(agentId, host); String msg = "host " + host.getPrivateIpAddress() + " changed from " + host.getResource() + " to " + resource; s_logger.debug(msg); - throw new RuntimeException(msg); + throw new HypervisorVersionChangedException(msg); } diff --git a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java index 2770dbfad0c..d901ed7a2df 100755 --- a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -1694,12 +1694,11 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } for (final AgentVmInfo left : infos.values()) { + boolean found = false; for (VirtualMachineGuru vmGuru : _vmGurus.values()) { VMInstanceVO vm = vmGuru.findByName(left.name); - if (vm == null) { - s_logger.warn("Stopping a VM that we have no record of: " + left.name); - commands.addCommand(cleanup(left.name)); - } else { + if (vm != null) { + found = true; HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); if(hvGuru.trackVmHostChange()) { Command command = compareState(hostId, vm, left, true, true); @@ -1707,11 +1706,16 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene commands.addCommand(command); } } else { - s_logger.warn("Stopping a VM that we have no record of: " + left.name); + s_logger.warn("Stopping a VM, VM " + left.name + " migrate from Host " + vm.getHostId() + " to Host " + hostId ); commands.addCommand(cleanup(left.name)); } + break; } } + if ( ! found ) { + s_logger.warn("Stopping a VM that we have no record of: " + left.name); + commands.addCommand(cleanup(left.name)); + } } return commands;