Bug 10197:

1. don't try HA vms if host hypervisor version changes
    2. fixed a bug related to VM full sync with hosttrack enabled
This commit is contained in:
anthony 2011-08-02 09:55:29 -07:00
parent 21afd1785a
commit 5f9884d97a
5 changed files with 22 additions and 9 deletions

View File

@ -78,7 +78,8 @@ public enum Status {
StartAgentRebalance(false, "Start rebalance for the certain host"),
RebalanceCompleted(false, "Host is rebalanced successfully"),
RebalanceFailed(false, "Failed to rebalance the host"),
PrepareUnmanaged(true, "prepare for cluster entering unmanaged status");
PrepareUnmanaged(true, "prepare for cluster entering unmanaged status"),
HypervisorVersionChanged(false, " hypervisor version changed when host is reconnected");
private final boolean isUserRequest;
private final String comment;
@ -130,6 +131,7 @@ public enum Status {
s_fsm.addTransition(Status.Connecting, Event.Ping, Status.Connecting);
s_fsm.addTransition(Status.Connecting, Event.ManagementServerDown, Status.Disconnected);
s_fsm.addTransition(Status.Connecting, Event.AgentDisconnected, Status.Alert);
s_fsm.addTransition(Status.Connecting, Event.HypervisorVersionChanged, Status.Disconnected);
s_fsm.addTransition(Status.Up, Event.PingTimeout, Status.Alert);
s_fsm.addTransition(Status.Up, Event.MaintenanceRequested, Status.PrepareForMaintenance);
s_fsm.addTransition(Status.Up, Event.AgentDisconnected, Status.Alert);
@ -140,6 +142,7 @@ public enum Status {
s_fsm.addTransition(Status.Up, Event.ManagementServerDown, Status.Disconnected);
s_fsm.addTransition(Status.Up, Event.StartAgentRebalance, Status.Rebalancing);
s_fsm.addTransition(Status.Up, Event.PrepareUnmanaged, Status.Disconnected);
s_fsm.addTransition(Status.Up, Event.HypervisorVersionChanged, Status.Disconnected);
s_fsm.addTransition(Status.Updating, Event.PingTimeout, Status.Alert);
s_fsm.addTransition(Status.Updating, Event.Ping, Status.Updating);
s_fsm.addTransition(Status.Updating, Event.AgentConnected, Status.Connecting);
@ -176,6 +179,7 @@ public enum Status {
s_fsm.addTransition(Status.Disconnected, Event.ManagementServerDown, Status.Disconnected);
s_fsm.addTransition(Status.Disconnected, Event.WaitedTooLong, Status.Alert);
s_fsm.addTransition(Status.Disconnected, Event.Remove, Status.Removed);
s_fsm.addTransition(Status.Disconnected, Event.HypervisorVersionChanged, Status.Disconnected);
s_fsm.addTransition(Status.Down, Event.MaintenanceRequested, Status.PrepareForMaintenance);
s_fsm.addTransition(Status.Down, Event.AgentConnected, Status.Connecting);
s_fsm.addTransition(Status.Down, Event.Remove, Status.Removed);

View File

@ -146,6 +146,7 @@ import com.cloud.utils.db.DB;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.exception.HypervisorVersionChangedException;
import com.cloud.utils.net.Ip;
import com.cloud.utils.net.NetUtils;
import com.cloud.utils.nio.HandlerFactory;
@ -1097,7 +1098,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
_hostDao.disconnect(host, event, _nodeId);
host = _hostDao.findById(host.getId());
if (!event.equals(Event.PrepareUnmanaged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
}
@ -1133,6 +1134,9 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
handleDisconnect(attache, Event.ShutdownRequested, false);
return attache;
}
} else if (e instanceof HypervisorVersionChangedException) {
handleDisconnect(attache, Event.HypervisorVersionChanged, false);
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
} else {
s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e);
handleDisconnect(attache, Event.AgentDisconnected, false);

View File

@ -678,7 +678,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
List<HostVO> result = new ArrayList<HostVO>();
ResultSet rs = null;
try {
String sql = "select h.id from host h left join cluster c on h.cluster_id=c.id where h.last_ping < ? and h.status in ('Up', 'Updating', 'Disconnected', 'Connecting') and h.type not in ('ExternalFirewall', 'ExternalLoadBalancer', 'TrafficMonitor', 'SecondaryStorage', 'LocalSecondaryStorage') and (h.cluster_id is null or c.managed_state = 'Managed') ;" ;
String sql = "select h.id from host h left join cluster c on h.cluster_id=c.id where h.mgmt_server_id is not null and h.last_ping < ? and h.status in ('Up', 'Updating', 'Disconnected', 'Connecting') and h.type not in ('ExternalFirewall', 'ExternalLoadBalancer', 'TrafficMonitor', 'SecondaryStorage', 'LocalSecondaryStorage') and (h.cluster_id is null or c.managed_state = 'Managed') ;" ;
pstmt = txn.prepareStatement(sql);
pstmt.setLong(1, timeout);
rs = pstmt.executeQuery();

View File

@ -75,6 +75,7 @@ import com.cloud.user.Account;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.component.Inject;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.exception.HypervisorVersionChangedException;
import com.xensource.xenapi.Connection;
import com.xensource.xenapi.Host;
import com.xensource.xenapi.Pool;
@ -543,7 +544,7 @@ public class XcpServerDiscoverer extends DiscovererBase implements Discoverer, L
_hostDao.update(agentId, host);
String msg = "host " + host.getPrivateIpAddress() + " changed from " + host.getResource() + " to " + resource;
s_logger.debug(msg);
throw new RuntimeException(msg);
throw new HypervisorVersionChangedException(msg);
}

View File

@ -1694,12 +1694,11 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
}
for (final AgentVmInfo left : infos.values()) {
boolean found = false;
for (VirtualMachineGuru<? extends VMInstanceVO> vmGuru : _vmGurus.values()) {
VMInstanceVO vm = vmGuru.findByName(left.name);
if (vm == null) {
s_logger.warn("Stopping a VM that we have no record of: " + left.name);
commands.addCommand(cleanup(left.name));
} else {
if (vm != null) {
found = true;
HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
if(hvGuru.trackVmHostChange()) {
Command command = compareState(hostId, vm, left, true, true);
@ -1707,11 +1706,16 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
commands.addCommand(command);
}
} else {
s_logger.warn("Stopping a VM that we have no record of: " + left.name);
s_logger.warn("Stopping a VM, VM " + left.name + " migrate from Host " + vm.getHostId() + " to Host " + hostId );
commands.addCommand(cleanup(left.name));
}
break;
}
}
if ( ! found ) {
s_logger.warn("Stopping a VM that we have no record of: " + left.name);
commands.addCommand(cleanup(left.name));
}
}
return commands;