bug 12790: use processDisconnect() when disconnect the agent during agent LB process

status 12790: resolved fixed
This commit is contained in:
Alena Prokharchyk 2011-12-29 16:56:04 -08:00
parent f6c163e46e
commit 4439fd8a51
3 changed files with 43 additions and 54 deletions

View File

@ -33,7 +33,7 @@ public enum Status {
Maintenance(false, false, false),
Alert(true, true, true),
Removed(true, false, true),
Rebalancing(false, false, false);
Rebalancing(true, false, true);
private final boolean updateManagementServer;
private final boolean checkManagementServer;
@ -194,6 +194,7 @@ public enum Status {
s_fsm.addTransition(Status.Alert, Event.AgentDisconnected, Status.Alert);
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected);
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting);
s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected);
}
public static void main(String[] args) {

View File

@ -113,8 +113,8 @@ import com.cloud.host.Status.Event;
import com.cloud.host.dao.HostDao;
import com.cloud.host.dao.HostDetailsDao;
import com.cloud.host.dao.HostTagsDao;
import com.cloud.hypervisor.HypervisorGuruManager;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.hypervisor.HypervisorGuruManager;
import com.cloud.hypervisor.kvm.resource.KvmDummyResourceBase;
import com.cloud.network.IPAddressVO;
import com.cloud.network.dao.IPAddressDao;
@ -921,7 +921,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
}
}
public void removeAgent(AgentAttache attache, Status nextState) {
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate) {
if (attache == null) {
return;
}
@ -945,6 +945,20 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
if (removed != null) {
removed.disconnect(nextState);
}
HostVO host = _hostDao.findById(hostId);
if (event != null && investigate != null) {
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
}
}
for (Pair<Integer, Listener> monitor : _hostMonitors) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
}
monitor.second().processDisconnect(hostId, nextState);
}
}
@Override
@ -998,7 +1012,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
HostVO host = _hostDao.findById(hostId);
if (host == null) {
s_logger.warn("Can't find host with " + hostId);
removeAgent(attache, Status.Removed);
removeAgent(attache, Status.Removed, event, investigate);
return true;
}
@ -1008,7 +1022,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
s_logger.debug("Host " + hostId + " is already " + currentState);
}
if (currentState != Status.PrepareForMaintenance) {
removeAgent(attache, currentState);
removeAgent(attache, currentState, event, investigate);
}
return true;
}
@ -1096,21 +1110,9 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Deregistering link for " + hostId + " with state " + nextState);
}
removeAgent(attache, nextState);
removeAgent(attache, nextState, event, investigate);
_hostDao.disconnect(host, event, _nodeId);
host = _hostDao.findById(host.getId());
if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
}
for (Pair<Integer, Listener> monitor : _hostMonitors) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
}
monitor.second().processDisconnect(hostId, nextState);
}
return true;
}
@ -1531,7 +1533,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
return false;
}
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) {
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert && host.getStatus() != Status.Rebalancing) {
s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus());
return false;
}

View File

@ -169,7 +169,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (s_logger.isInfoEnabled()) {
s_logger.info(host + " is detected down, but we have a forward attache running, disconnect this one before launching the host");
}
removeAgent(agentattache, Status.Disconnected);
removeAgent(agentattache, Status.Disconnected, null, null);
} else {
continue;
}
@ -709,32 +709,27 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
@Override
public void removeAgent(AgentAttache attache, Status nextState) {
public void removeAgent(AgentAttache attache, Status nextState, Event event, Boolean investigate) {
if (attache == null) {
return;
}
super.removeAgent(attache, nextState);
super.removeAgent(attache, nextState, event, investigate);
}
@Override
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
boolean result = false;
if (event == Event.RequestAgentRebalance) {
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
} else if (event == Event.StartAgentRebalance) {
boolean result = false;
try {
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
} catch (Exception e) {
s_logger.warn("Unable to rebalance host id=" + agentId, e);
} finally {
if (!result) {
failRebalance(agentId);
return false;
}
}
}
return true;
return result;
}
@Override
@ -958,18 +953,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (currentOwnerId == _nodeId) {
if (!startRebalance(hostId)) {
s_logger.debug("Failed to start agent rebalancing");
failRebalance(hostId);
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
return false;
}
try {
Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance);
if (answer == null || !answer[0].getResult()) {
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process");
result = false;
}
} catch (Exception ex) {
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
result = false;
}
@ -977,7 +971,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
s_logger.debug("Successfully transfered host id=" + hostId + " to management server " + futureOwnerId);
finishRebalance(hostId, futureOwnerId, Event.RebalanceCompleted);
} else {
s_logger.debug("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
s_logger.warn("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
}
@ -985,13 +979,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
HostVO host = _hostDao.findById(hostId);
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") as a part of rebalance process");
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
}
result = loadDirectlyConnectedHost(host, true);
} catch (Exception ex) {
s_logger.warn("Unable to load directly connected host " + host.getId() + " as a part of rebalance due to exception: ", ex);
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process due to:", ex);
result = false;
}
if (result) {
s_logger.debug("Successfully loaded directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
} else {
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
}
}
return result;
@ -1002,7 +1002,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
boolean success = (event == Event.RebalanceCompleted) ? true : false;
if (s_logger.isDebugEnabled()) {
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with result " + success);
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with event " + event);
}
AgentAttache attache = findAttache(hostId);
@ -1042,13 +1042,12 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
try {
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
_hostTransferDao.completeAgentTransfer(hostId);
reconnect(hostId);
handleDisconnect(findAttache(hostId), Event.RebalanceFailed, false);
} catch (Exception ex) {
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
}
}
@DB
protected boolean startRebalance(final long hostId) {
HostVO host = _hostDao.findById(hostId);
@ -1060,7 +1059,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
synchronized (_agents) {
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
removeAgent(attache, Status.Rebalancing);
handleDisconnect(attache, Event.StartAgentRebalance, false);
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
if (forwardAttache == null) {
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");
@ -1079,15 +1078,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
}
Transaction txn = Transaction.currentTxn();
txn.start();
s_logger.debug("Updating host id=" + hostId + " with the status " + Status.Rebalancing);
host.setManagementServerId(null);
_hostDao.updateStatus(host, Event.StartAgentRebalance, _nodeId);
_hostTransferDao.startAgentTransfer(hostId);
txn.commit();
return true;
}
@ -1119,19 +1110,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public void run() {
boolean result = false;
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Rebalancing host id=" + hostId);
}
result = rebalanceHost(hostId, currentOwnerId, futureOwnerId);
rebalanceHost(hostId, currentOwnerId, futureOwnerId);
} catch (Exception e) {
s_logger.warn("Unable to rebalance host id=" + hostId, e);
} finally {
if (!result) {
failRebalance(hostId);
}
StackMaid.current().exitCleanup();
}
}