diff --git a/.vmops.log.swp b/.vmops.log.swp deleted file mode 100644 index f2d18773fda..00000000000 Binary files a/.vmops.log.swp and /dev/null differ diff --git a/api/src/com/cloud/host/Status.java b/api/src/com/cloud/host/Status.java index e34cc030954..bbbf68f9fcf 100755 --- a/api/src/com/cloud/host/Status.java +++ b/api/src/com/cloud/host/Status.java @@ -21,7 +21,6 @@ import java.util.List; import java.util.Set; import com.cloud.utils.fsm.NoTransitionException; -import com.cloud.utils.fsm.StateMachine; import com.cloud.utils.fsm.StateMachine2; public enum Status { @@ -33,7 +32,7 @@ public enum Status { Alert(true, true, true), Removed(true, false, true), Error(true, false, true), - Rebalancing(false, false, false); + Rebalancing(true, false, true); private final boolean updateManagementServer; private final boolean checkManagementServer; @@ -157,7 +156,9 @@ public enum Status { s_fsm.addTransition(Status.Alert, Event.ShutdownRequested, Status.Disconnected); s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected); s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting); + s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected); s_fsm.addTransition(Status.Rebalancing, Event.AgentConnected, Status.Connecting); + s_fsm.addTransition(Status.Rebalancing, Event.AgentDisconnected, Status.Rebalancing); s_fsm.addTransition(Status.Error, Event.AgentConnected, Status.Connecting); } diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index 25e8703d11e..2395924b372 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -557,6 +557,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { return req.getSequence(); } + public void removeAgent(AgentAttache attache, Status nextState) { if (attache == null) { return; @@ -581,6 +582,13 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { if (removed != null) { removed.disconnect(nextState); } + + for (Pair monitor : _hostMonitors) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName()); + } + monitor.second().processDisconnect(hostId, nextState); + } } protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, final StartupCommand[] cmd, boolean forRebalance) throws ConnectionException { @@ -848,12 +856,6 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { removeAgent(attache, nextStatus); disconnectAgent(host, event, _nodeId); - for (Pair monitor : _hostMonitors) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName()); - } - monitor.second().processDisconnect(hostId, nextStatus); - } return true; } @@ -1014,7 +1016,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { return false; } - if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) { + if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert && host.getStatus() != Status.Rebalancing) { s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus()); return false; } diff --git a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index cc464b07c2d..546a1e1d424 100755 --- a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -42,7 +42,6 @@ import com.cloud.agent.api.TransferAgentCommand; import com.cloud.agent.transport.Request; import com.cloud.agent.transport.Request.Version; import com.cloud.agent.transport.Response; -import com.cloud.api.commands.UpdateHostPasswordCmd; import com.cloud.cluster.ClusterManager; import com.cloud.cluster.ClusterManagerListener; import com.cloud.cluster.ClusteredAgentRebalanceService; @@ -64,18 +63,16 @@ import com.cloud.host.Status; import com.cloud.host.Status.Event; import com.cloud.resource.ServerResource; import com.cloud.storage.resource.DummySecondaryStorageResource; -import com.cloud.user.User; import com.cloud.utils.DateUtil; import com.cloud.utils.NumbersUtil; import com.cloud.utils.component.Adapters; import com.cloud.utils.component.ComponentLocator; import com.cloud.utils.component.Inject; import com.cloud.utils.concurrency.NamedThreadFactory; -import com.cloud.utils.db.DB; +import com.cloud.utils.db.SearchCriteria.Op; import com.cloud.utils.db.SearchCriteria2; import com.cloud.utils.db.SearchCriteriaService; import com.cloud.utils.db.Transaction; -import com.cloud.utils.db.SearchCriteria.Op; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.utils.nio.Link; import com.cloud.utils.nio.Task; @@ -657,22 +654,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust @Override public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException { + boolean result = false; if (event == Event.RequestAgentRebalance) { return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId); } else if (event == Event.StartAgentRebalance) { - boolean result = false; try { - result = rebalanceHost(agentId, currentOwnerId, futureOwnerId); + result = rebalanceHost(agentId, currentOwnerId, futureOwnerId); } catch (Exception e) { s_logger.warn("Unable to rebalance host id=" + agentId, e); - } finally { - if (!result) { - failRebalance(agentId); - return false; - } } } - return true; + return result; } @Override @@ -899,18 +891,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust if (currentOwnerId == _nodeId) { if (!startRebalance(hostId)) { s_logger.debug("Failed to start agent rebalancing"); - failRebalance(hostId); + finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed); return false; } try { Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance); if (answer == null || !answer[0].getResult()) { - s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process"); result = false; } } catch (Exception ex) { - s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex); + s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex); result = false; } @@ -918,7 +909,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust s_logger.debug("Successfully transfered host id=" + hostId + " to management server " + futureOwnerId); finishRebalance(hostId, futureOwnerId, Event.RebalanceCompleted); } else { - s_logger.debug("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId); + s_logger.warn("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId); finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed); } @@ -926,13 +917,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust HostVO host = _hostDao.findById(hostId); try { if (s_logger.isDebugEnabled()) { - s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") as a part of rebalance process"); + s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process"); } result = loadDirectlyConnectedHost(host, true); } catch (Exception ex) { - s_logger.warn("Unable to load directly connected host " + host.getId() + " as a part of rebalance due to exception: ", ex); + s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process due to:", ex); result = false; } + + if (result) { + s_logger.debug("Successfully loaded directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process"); + } else { + s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process"); + } } return result; @@ -943,7 +940,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust boolean success = (event == Event.RebalanceCompleted) ? true : false; if (s_logger.isDebugEnabled()) { - s_logger.debug("Finishing rebalancing for the agent " + hostId + " with result " + success); + s_logger.debug("Finishing rebalancing for the agent " + hostId + " with event " + event); } AgentAttache attache = findAttache(hostId); @@ -986,13 +983,12 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust try { s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId); _hostTransferDao.completeAgentTransfer(hostId); - reconnect(hostId); + handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed); } catch (Exception ex) { s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup"); } } - @DB protected boolean startRebalance(final long hostId) { HostVO host = _hostDao.findById(hostId); @@ -1004,7 +1000,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust synchronized (_agents) { ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId); if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) { - removeAgent(attache, Status.Rebalancing); + handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance); ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId); if (forwardAttache == null) { s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process"); @@ -1021,17 +1017,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } return false; } - } - - Transaction txn = Transaction.currentTxn(); - txn.start(); - - s_logger.debug("Updating host id=" + hostId + " with the status " + Status.Rebalancing); - host.setManagementServerId(null); - _agentMgr.agentStatusTransitTo(host, Event.StartAgentRebalance, _nodeId); + } _hostTransferDao.startAgentTransfer(hostId); - txn.commit(); - return true; } @@ -1063,19 +1050,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust @Override public void run() { - boolean result = false; try { if (s_logger.isDebugEnabled()) { s_logger.debug("Rebalancing host id=" + hostId); } - result = rebalanceHost(hostId, currentOwnerId, futureOwnerId); + rebalanceHost(hostId, currentOwnerId, futureOwnerId); } catch (Exception e) { s_logger.warn("Unable to rebalance host id=" + hostId, e); - } finally { - if (!result) { - failRebalance(hostId); - } StackMaid.current().exitCleanup(); } }