mirror of https://github.com/apache/cloudstack.git
bug 12844, 13394: 1. if connect to host fails, don't need to investigate
2. add ha parameter to dissconnect host to indicate if HA VMs on this host status 12844, 13394: resolved fixed reviewed-by : edison
This commit is contained in:
parent
97953c7e11
commit
c530cbad2a
|
|
@ -4098,9 +4098,9 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
|||
}
|
||||
|
||||
protected SetupAnswer execute(SetupCommand cmd) {
|
||||
Connection conn = getConnection();
|
||||
setupServer(conn);
|
||||
try {
|
||||
Connection conn = getConnection();
|
||||
setupServer(conn);
|
||||
if (!setIptables(conn)) {
|
||||
s_logger.warn("set xenserver Iptable failed");
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -721,7 +721,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
|
||||
_dcDao.releasePrivateIpAddress(host.getPrivateIpAddress(), host.getDataCenterId(), null);
|
||||
if (attache != null) {
|
||||
handleDisconnect(attache, Status.Event.Remove, false);
|
||||
handleDisconnect(attache, Status.Event.Remove, false, true);
|
||||
}
|
||||
// delete host details
|
||||
_hostDetailsDao.deleteDetails(hostId);
|
||||
|
|
@ -921,7 +921,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
}
|
||||
}
|
||||
|
||||
public void removeAgent(AgentAttache attache, Status nextState) {
|
||||
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate, boolean ha) {
|
||||
if (attache == null) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -946,6 +946,13 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
removed.disconnect(nextState);
|
||||
}
|
||||
|
||||
HostVO host = _hostDao.findById(hostId);
|
||||
if (ha && event != null && investigate != null) {
|
||||
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
|
||||
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
|
||||
}
|
||||
}
|
||||
|
||||
for (Pair<Integer, Listener> monitor : _hostMonitors) {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
|
||||
|
|
@ -993,7 +1000,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
_executor.submit(new DisconnectTask(attache, event, investigate));
|
||||
}
|
||||
|
||||
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate) {
|
||||
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate, boolean ha) {
|
||||
if (attache == null) {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1005,7 +1012,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
HostVO host = _hostDao.findById(hostId);
|
||||
if (host == null) {
|
||||
s_logger.warn("Can't find host with " + hostId);
|
||||
removeAgent(attache, Status.Removed);
|
||||
removeAgent(attache, Status.Removed, event, investigate, ha);
|
||||
return true;
|
||||
|
||||
}
|
||||
|
|
@ -1015,7 +1022,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
s_logger.debug("Host " + hostId + " is already " + currentState);
|
||||
}
|
||||
if (currentState != Status.PrepareForMaintenance) {
|
||||
removeAgent(attache, currentState);
|
||||
removeAgent(attache, currentState, event, investigate, ha);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1103,13 +1110,8 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Deregistering link for " + hostId + " with state " + nextState);
|
||||
}
|
||||
|
||||
removeAgent(attache, nextState);
|
||||
removeAgent(attache, nextState, event, investigate, ha);
|
||||
_hostDao.disconnect(host, event, _nodeId);
|
||||
|
||||
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
|
||||
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1129,19 +1131,19 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
ConnectionException ce = (ConnectionException)e;
|
||||
if (ce.isSetupError()) {
|
||||
s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage());
|
||||
handleDisconnect(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnect(attache, Event.AgentDisconnected, false, false);
|
||||
throw ce;
|
||||
} else {
|
||||
s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage());
|
||||
handleDisconnect(attache, Event.ShutdownRequested, true);
|
||||
handleDisconnect(attache, Event.ShutdownRequested, false, false);
|
||||
return attache;
|
||||
}
|
||||
} else if (e instanceof HypervisorVersionChangedException) {
|
||||
handleDisconnect(attache, Event.HypervisorVersionChanged, true);
|
||||
handleDisconnect(attache, Event.HypervisorVersionChanged, false, false);
|
||||
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
|
||||
} else {
|
||||
s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e);
|
||||
handleDisconnect(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnect(attache, Event.AgentDisconnected, false, false);
|
||||
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
|
||||
}
|
||||
}
|
||||
|
|
@ -1155,7 +1157,8 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
// this is tricky part for secondary storage
|
||||
// make it as disconnected, wait for secondary storage VM to be up
|
||||
// return the attache instead of null, even it is disconnectede
|
||||
handleDisconnect(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnect(attache, Event.AgentDisconnected, false, false);
|
||||
throw new CloudRuntimeException("ReadyCommand failed " + attache.getId());
|
||||
}
|
||||
|
||||
_hostDao.updateStatus(host, Event.Ready, _nodeId);
|
||||
|
|
@ -1464,7 +1467,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
handleDisconnect(_attache, _event, _investigate);
|
||||
handleDisconnect(_attache, _event, _investigate, true);
|
||||
} catch (final Exception e) {
|
||||
s_logger.error("Exception caught while handling disconnect: ", e);
|
||||
} finally {
|
||||
|
|
@ -1596,7 +1599,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
AgentAttache attache = null;
|
||||
attache = findAttache(hostId);
|
||||
if (attache != null) {
|
||||
handleDisconnect(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnect(attache, Event.AgentDisconnected, true, false);
|
||||
}
|
||||
return true;
|
||||
} else if (event == Event.ShutdownRequested) {
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info(host + " is detected down, but we have a forward attache running, disconnect this one before launching the host");
|
||||
}
|
||||
removeAgent(agentattache, Status.Disconnected);
|
||||
removeAgent(agentattache, Status.Disconnected, null, null, false);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -286,16 +286,16 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
}
|
||||
|
||||
@Override
|
||||
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate) {
|
||||
return handleDisconnect(attache, event, investigate, true);
|
||||
protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate, boolean ha) {
|
||||
return handleDisconnect(attache, event, investigate, true, ha);
|
||||
}
|
||||
|
||||
protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) {
|
||||
protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast, boolean ha) {
|
||||
if (agent == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (super.handleDisconnect(agent, event, investigate)) {
|
||||
if (super.handleDisconnect(agent, event, investigate, ha)) {
|
||||
if (broadcast) {
|
||||
notifyNodesInCluster(agent);
|
||||
}
|
||||
|
|
@ -709,22 +709,22 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
}
|
||||
|
||||
@Override
|
||||
public void removeAgent(AgentAttache attache, Status nextState) {
|
||||
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate, boolean ha) {
|
||||
if (attache == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
super.removeAgent(attache, nextState);
|
||||
super.removeAgent(attache, nextState, event, investigate, ha);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
|
||||
boolean result = false;
|
||||
boolean result = false;
|
||||
if (event == Event.RequestAgentRebalance) {
|
||||
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
|
||||
} else if (event == Event.StartAgentRebalance) {
|
||||
try {
|
||||
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to rebalance host id=" + agentId, e);
|
||||
}
|
||||
|
|
@ -1042,7 +1042,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
try {
|
||||
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
handleDisconnect(findAttache(hostId), Event.RebalanceFailed, false);
|
||||
handleDisconnect(findAttache(hostId), Event.RebalanceFailed, false, false);
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
|
||||
}
|
||||
|
|
@ -1059,7 +1059,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
synchronized (_agents) {
|
||||
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
|
||||
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
|
||||
handleDisconnect(attache, Event.StartAgentRebalance, false);
|
||||
handleDisconnect(attache, Event.StartAgentRebalance, false, false);
|
||||
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
|
||||
if (forwardAttache == null) {
|
||||
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");
|
||||
|
|
|
|||
Loading…
Reference in New Issue