bug 12844, 13394: 1. if connect to host fails, don't need to investigate

2. add ha parameter to dissconnect host to indicate if HA VMs on this host

status 12844, 13394: resolved fixed

reviewed-by : edison
This commit is contained in:
anthony 2012-01-31 14:59:45 -08:00
parent 97953c7e11
commit c530cbad2a
3 changed files with 34 additions and 31 deletions

View File

@ -4098,9 +4098,9 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
}
protected SetupAnswer execute(SetupCommand cmd) {
Connection conn = getConnection();
setupServer(conn);
try {
Connection conn = getConnection();
setupServer(conn);
if (!setIptables(conn)) {
s_logger.warn("set xenserver Iptable failed");
return null;

View File

@ -721,7 +721,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
_dcDao.releasePrivateIpAddress(host.getPrivateIpAddress(), host.getDataCenterId(), null);
if (attache != null) {
handleDisconnect(attache, Status.Event.Remove, false);
handleDisconnect(attache, Status.Event.Remove, false, true);
}
// delete host details
_hostDetailsDao.deleteDetails(hostId);
@ -921,7 +921,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
}
}
public void removeAgent(AgentAttache attache, Status nextState) {
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate, boolean ha) {
if (attache == null) {
return;
}
@ -946,6 +946,13 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
removed.disconnect(nextState);
}
HostVO host = _hostDao.findById(hostId);
if (ha && event != null && investigate != null) {
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
}
}
for (Pair<Integer, Listener> monitor : _hostMonitors) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
@ -993,7 +1000,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
_executor.submit(new DisconnectTask(attache, event, investigate));
}
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate) {
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate, boolean ha) {
if (attache == null) {
return true;
}
@ -1005,7 +1012,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
HostVO host = _hostDao.findById(hostId);
if (host == null) {
s_logger.warn("Can't find host with " + hostId);
removeAgent(attache, Status.Removed);
removeAgent(attache, Status.Removed, event, investigate, ha);
return true;
}
@ -1015,7 +1022,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
s_logger.debug("Host " + hostId + " is already " + currentState);
}
if (currentState != Status.PrepareForMaintenance) {
removeAgent(attache, currentState);
removeAgent(attache, currentState, event, investigate, ha);
}
return true;
}
@ -1103,13 +1110,8 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Deregistering link for " + hostId + " with state " + nextState);
}
removeAgent(attache, nextState);
removeAgent(attache, nextState, event, investigate, ha);
_hostDao.disconnect(host, event, _nodeId);
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
}
return true;
}
@ -1129,19 +1131,19 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
ConnectionException ce = (ConnectionException)e;
if (ce.isSetupError()) {
s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage());
handleDisconnect(attache, Event.AgentDisconnected, true);
handleDisconnect(attache, Event.AgentDisconnected, false, false);
throw ce;
} else {
s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage());
handleDisconnect(attache, Event.ShutdownRequested, true);
handleDisconnect(attache, Event.ShutdownRequested, false, false);
return attache;
}
} else if (e instanceof HypervisorVersionChangedException) {
handleDisconnect(attache, Event.HypervisorVersionChanged, true);
handleDisconnect(attache, Event.HypervisorVersionChanged, false, false);
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
} else {
s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e);
handleDisconnect(attache, Event.AgentDisconnected, true);
handleDisconnect(attache, Event.AgentDisconnected, false, false);
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
}
}
@ -1155,7 +1157,8 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
// this is tricky part for secondary storage
// make it as disconnected, wait for secondary storage VM to be up
// return the attache instead of null, even it is disconnectede
handleDisconnect(attache, Event.AgentDisconnected, true);
handleDisconnect(attache, Event.AgentDisconnected, false, false);
throw new CloudRuntimeException("ReadyCommand failed " + attache.getId());
}
_hostDao.updateStatus(host, Event.Ready, _nodeId);
@ -1464,7 +1467,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
@Override
public void run() {
try {
handleDisconnect(_attache, _event, _investigate);
handleDisconnect(_attache, _event, _investigate, true);
} catch (final Exception e) {
s_logger.error("Exception caught while handling disconnect: ", e);
} finally {
@ -1596,7 +1599,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
AgentAttache attache = null;
attache = findAttache(hostId);
if (attache != null) {
handleDisconnect(attache, Event.AgentDisconnected, true);
handleDisconnect(attache, Event.AgentDisconnected, true, false);
}
return true;
} else if (event == Event.ShutdownRequested) {

View File

@ -169,7 +169,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (s_logger.isInfoEnabled()) {
s_logger.info(host + " is detected down, but we have a forward attache running, disconnect this one before launching the host");
}
removeAgent(agentattache, Status.Disconnected);
removeAgent(agentattache, Status.Disconnected, null, null, false);
} else {
continue;
}
@ -286,16 +286,16 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
@Override
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate) {
return handleDisconnect(attache, event, investigate, true);
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate, boolean ha) {
return handleDisconnect(attache, event, investigate, true, ha);
}
protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) {
protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast, boolean ha) {
if (agent == null) {
return true;
}
if (super.handleDisconnect(agent, event, investigate)) {
if (super.handleDisconnect(agent, event, investigate, ha)) {
if (broadcast) {
notifyNodesInCluster(agent);
}
@ -709,22 +709,22 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
@Override
public void removeAgent(AgentAttache attache, Status nextState) {
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate, boolean ha) {
if (attache == null) {
return;
}
super.removeAgent(attache, nextState);
super.removeAgent(attache, nextState, event, investigate, ha);
}
@Override
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
boolean result = false;
boolean result = false;
if (event == Event.RequestAgentRebalance) {
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
} else if (event == Event.StartAgentRebalance) {
try {
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
} catch (Exception e) {
s_logger.warn("Unable to rebalance host id=" + agentId, e);
}
@ -1042,7 +1042,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
try {
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
_hostTransferDao.completeAgentTransfer(hostId);
handleDisconnect(findAttache(hostId), Event.RebalanceFailed, false);
handleDisconnect(findAttache(hostId), Event.RebalanceFailed, false, false);
} catch (Exception ex) {
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
}
@ -1059,7 +1059,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
synchronized (_agents) {
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
handleDisconnect(attache, Event.StartAgentRebalance, false);
handleDisconnect(attache, Event.StartAgentRebalance, false, false);
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
if (forwardAttache == null) {
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");