mirror of https://github.com/apache/cloudstack.git
bug 12790: use processDisconnect() when disconnect the agent during agent LB process
status 12790: resolved fixed
This commit is contained in:
parent
f6c163e46e
commit
4439fd8a51
|
|
@ -33,7 +33,7 @@ public enum Status {
|
|||
Maintenance(false, false, false),
|
||||
Alert(true, true, true),
|
||||
Removed(true, false, true),
|
||||
Rebalancing(false, false, false);
|
||||
Rebalancing(true, false, true);
|
||||
|
||||
private final boolean updateManagementServer;
|
||||
private final boolean checkManagementServer;
|
||||
|
|
@ -194,6 +194,7 @@ public enum Status {
|
|||
s_fsm.addTransition(Status.Alert, Event.AgentDisconnected, Status.Alert);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
|
|
|||
|
|
@ -113,8 +113,8 @@ import com.cloud.host.Status.Event;
|
|||
import com.cloud.host.dao.HostDao;
|
||||
import com.cloud.host.dao.HostDetailsDao;
|
||||
import com.cloud.host.dao.HostTagsDao;
|
||||
import com.cloud.hypervisor.HypervisorGuruManager;
|
||||
import com.cloud.hypervisor.Hypervisor.HypervisorType;
|
||||
import com.cloud.hypervisor.HypervisorGuruManager;
|
||||
import com.cloud.hypervisor.kvm.resource.KvmDummyResourceBase;
|
||||
import com.cloud.network.IPAddressVO;
|
||||
import com.cloud.network.dao.IPAddressDao;
|
||||
|
|
@ -921,7 +921,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
}
|
||||
}
|
||||
|
||||
public void removeAgent(AgentAttache attache, Status nextState) {
|
||||
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate) {
|
||||
if (attache == null) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -945,6 +945,20 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
if (removed != null) {
|
||||
removed.disconnect(nextState);
|
||||
}
|
||||
|
||||
HostVO host = _hostDao.findById(hostId);
|
||||
if (event != null && investigate != null) {
|
||||
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
|
||||
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
|
||||
}
|
||||
}
|
||||
|
||||
for (Pair<Integer, Listener> monitor : _hostMonitors) {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
|
||||
}
|
||||
monitor.second().processDisconnect(hostId, nextState);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -998,7 +1012,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
HostVO host = _hostDao.findById(hostId);
|
||||
if (host == null) {
|
||||
s_logger.warn("Can't find host with " + hostId);
|
||||
removeAgent(attache, Status.Removed);
|
||||
removeAgent(attache, Status.Removed, event, investigate);
|
||||
return true;
|
||||
|
||||
}
|
||||
|
|
@ -1008,7 +1022,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
s_logger.debug("Host " + hostId + " is already " + currentState);
|
||||
}
|
||||
if (currentState != Status.PrepareForMaintenance) {
|
||||
removeAgent(attache, currentState);
|
||||
removeAgent(attache, currentState, event, investigate);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1096,21 +1110,9 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Deregistering link for " + hostId + " with state " + nextState);
|
||||
}
|
||||
removeAgent(attache, nextState);
|
||||
removeAgent(attache, nextState, event, investigate);
|
||||
_hostDao.disconnect(host, event, _nodeId);
|
||||
|
||||
host = _hostDao.findById(host.getId());
|
||||
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
|
||||
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
|
||||
}
|
||||
|
||||
for (Pair<Integer, Listener> monitor : _hostMonitors) {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
|
||||
}
|
||||
monitor.second().processDisconnect(hostId, nextState);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1531,7 +1533,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) {
|
||||
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert && host.getStatus() != Status.Rebalancing) {
|
||||
s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus());
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info(host + " is detected down, but we have a forward attache running, disconnect this one before launching the host");
|
||||
}
|
||||
removeAgent(agentattache, Status.Disconnected);
|
||||
removeAgent(agentattache, Status.Disconnected, null, null);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -709,32 +709,27 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
}
|
||||
|
||||
@Override
|
||||
public void removeAgent(AgentAttache attache, Status nextState) {
|
||||
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate) {
|
||||
if (attache == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
super.removeAgent(attache, nextState);
|
||||
super.removeAgent(attache, nextState, event, investigate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
|
||||
boolean result = false;
|
||||
if (event == Event.RequestAgentRebalance) {
|
||||
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
|
||||
} else if (event == Event.StartAgentRebalance) {
|
||||
boolean result = false;
|
||||
try {
|
||||
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to rebalance host id=" + agentId, e);
|
||||
} finally {
|
||||
if (!result) {
|
||||
failRebalance(agentId);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -958,18 +953,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
if (currentOwnerId == _nodeId) {
|
||||
if (!startRebalance(hostId)) {
|
||||
s_logger.debug("Failed to start agent rebalancing");
|
||||
failRebalance(hostId);
|
||||
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance);
|
||||
if (answer == null || !answer[0].getResult()) {
|
||||
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process");
|
||||
result = false;
|
||||
}
|
||||
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
|
||||
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
|
||||
result = false;
|
||||
}
|
||||
|
||||
|
|
@ -977,7 +971,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
s_logger.debug("Successfully transfered host id=" + hostId + " to management server " + futureOwnerId);
|
||||
finishRebalance(hostId, futureOwnerId, Event.RebalanceCompleted);
|
||||
} else {
|
||||
s_logger.debug("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
|
||||
s_logger.warn("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
|
||||
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
|
||||
}
|
||||
|
||||
|
|
@ -985,13 +979,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
HostVO host = _hostDao.findById(hostId);
|
||||
try {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") as a part of rebalance process");
|
||||
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
|
||||
}
|
||||
result = loadDirectlyConnectedHost(host, true);
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Unable to load directly connected host " + host.getId() + " as a part of rebalance due to exception: ", ex);
|
||||
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process due to:", ex);
|
||||
result = false;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
s_logger.debug("Successfully loaded directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
|
||||
} else {
|
||||
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -1002,7 +1002,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
|
||||
boolean success = (event == Event.RebalanceCompleted) ? true : false;
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with result " + success);
|
||||
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with event " + event);
|
||||
}
|
||||
|
||||
AgentAttache attache = findAttache(hostId);
|
||||
|
|
@ -1042,13 +1042,12 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
try {
|
||||
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
reconnect(hostId);
|
||||
handleDisconnect(findAttache(hostId), Event.RebalanceFailed, false);
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
|
||||
}
|
||||
}
|
||||
|
||||
@DB
|
||||
protected boolean startRebalance(final long hostId) {
|
||||
HostVO host = _hostDao.findById(hostId);
|
||||
|
||||
|
|
@ -1060,7 +1059,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
synchronized (_agents) {
|
||||
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
|
||||
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
|
||||
removeAgent(attache, Status.Rebalancing);
|
||||
handleDisconnect(attache, Event.StartAgentRebalance, false);
|
||||
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
|
||||
if (forwardAttache == null) {
|
||||
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");
|
||||
|
|
@ -1079,15 +1078,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
}
|
||||
}
|
||||
|
||||
Transaction txn = Transaction.currentTxn();
|
||||
txn.start();
|
||||
|
||||
s_logger.debug("Updating host id=" + hostId + " with the status " + Status.Rebalancing);
|
||||
host.setManagementServerId(null);
|
||||
_hostDao.updateStatus(host, Event.StartAgentRebalance, _nodeId);
|
||||
_hostTransferDao.startAgentTransfer(hostId);
|
||||
txn.commit();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1119,19 +1110,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
boolean result = false;
|
||||
try {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Rebalancing host id=" + hostId);
|
||||
}
|
||||
result = rebalanceHost(hostId, currentOwnerId, futureOwnerId);
|
||||
rebalanceHost(hostId, currentOwnerId, futureOwnerId);
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to rebalance host id=" + hostId, e);
|
||||
|
||||
} finally {
|
||||
if (!result) {
|
||||
failRebalance(hostId);
|
||||
}
|
||||
StackMaid.current().exitCleanup();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue