From bb4237336fdd722a06dcd0fbb031c816456c3303 Mon Sep 17 00:00:00 2001 From: frank Date: Fri, 23 Sep 2011 16:22:14 -0700 Subject: [PATCH] Bug 11522 - New agent manager refine reconnectHost logic --- .../cloud/api/commands/ReconnectHostCmd.java | 2 +- .../com/cloud/resource/ResourceService.java | 2 +- server/src/com/cloud/agent/AgentManager.java | 4 +- .../cloud/agent/manager/AgentManagerImpl.java | 54 ++++++------------- .../manager/ClusteredAgentManagerImpl.java | 30 ++++++----- .../agent/manager/DirectAgentAttache.java | 2 +- .../cloud/resource/ResourceManagerImpl.java | 13 ++--- 7 files changed, 40 insertions(+), 67 deletions(-) mode change 100644 => 100755 api/src/com/cloud/api/commands/ReconnectHostCmd.java mode change 100644 => 100755 api/src/com/cloud/resource/ResourceService.java mode change 100644 => 100755 server/src/com/cloud/agent/manager/DirectAgentAttache.java diff --git a/api/src/com/cloud/api/commands/ReconnectHostCmd.java b/api/src/com/cloud/api/commands/ReconnectHostCmd.java old mode 100644 new mode 100755 index 332d422c240..0c7d3b13b15 --- a/api/src/com/cloud/api/commands/ReconnectHostCmd.java +++ b/api/src/com/cloud/api/commands/ReconnectHostCmd.java @@ -107,7 +107,7 @@ public class ReconnectHostCmd extends BaseAsyncCmd { } else { throw new ServerApiException(BaseCmd.INTERNAL_ERROR, "Failed to reconnect host"); } - } catch (AgentUnavailableException ex) { + } catch (Exception ex) { s_logger.warn("Exception: ", ex); throw new ServerApiException(BaseCmd.RESOURCE_UNAVAILABLE_ERROR, ex.getMessage()); } diff --git a/api/src/com/cloud/resource/ResourceService.java b/api/src/com/cloud/resource/ResourceService.java old mode 100644 new mode 100755 index d6e5ca9aa8e..dcc56ce5643 --- a/api/src/com/cloud/resource/ResourceService.java +++ b/api/src/com/cloud/resource/ResourceService.java @@ -46,7 +46,7 @@ public interface ResourceService { Host cancelMaintenance(CancelMaintenanceCmd cmd); - Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException; + Host reconnectHost(ReconnectHostCmd cmd); /** * We will automatically create a cloud.com cluster to attach to the external cluster and return a hyper host to perform diff --git a/server/src/com/cloud/agent/AgentManager.java b/server/src/com/cloud/agent/AgentManager.java index b48382e352e..0f563792a27 100755 --- a/server/src/com/cloud/agent/AgentManager.java +++ b/server/src/com/cloud/agent/AgentManager.java @@ -189,8 +189,6 @@ public interface AgentManager extends Manager { public boolean executeUserRequest(long hostId, Event event) throws AgentUnavailableException; - public boolean reconnect(final long hostId) throws AgentUnavailableException; - boolean isHostNativeHAEnabled(long hostId); Answer sendTo(Long dcId, HypervisorType type, Command cmd); @@ -219,4 +217,6 @@ public interface AgentManager extends Manager { public boolean disconnectAgent(HostVO host, Status.Event e, long msId); public void pullAgentToMaintenance(long hostId); + + boolean reconnect(long hostId); } diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index c5e098ec70f..4829a60f71a 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -718,9 +718,6 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { public void updateStatus(HostVO host, Status.Event event) { _hostDao.updateStatus(host, event, _nodeId); } - public void disconnect(AgentAttache attache, final Status.Event event, final boolean investigate) { - _executor.submit(new DisconnectTask(attache, event, investigate)); - } protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, final StartupCommand[] cmd, boolean forRebalance) throws ConnectionException { long hostId = attache.getId(); @@ -737,19 +734,19 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { ConnectionException ce = (ConnectionException)e; if (ce.isSetupError()) { s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage()); - handleDisconnect(attache, Event.AgentDisconnected); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected); throw ce; } else { s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage()); - handleDisconnect(attache, Event.ShutdownRequested); + handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested); return attache; } } else if (e instanceof HypervisorVersionChangedException) { - handleDisconnect(attache, Event.HypervisorVersionChanged); + handleDisconnectWithoutInvestigation(attache, Event.HypervisorVersionChanged); throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); } else { s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e); - handleDisconnect(attache, Event.AgentDisconnected); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected); throw new CloudRuntimeException("Unable to connect " + attache.getId(), e); } } @@ -763,7 +760,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { // this is tricky part for secondary storage // make it as disconnected, wait for secondary storage VM to be up // return the attache instead of null, even it is disconnectede - handleDisconnect(attache, Event.AgentDisconnected); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected); } _hostDao.updateStatus(host, Event.Ready, _nodeId); @@ -1010,7 +1007,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { return _name; } - protected boolean handleDisconnect(AgentAttache attache, Status.Event event) { + protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event) { long hostId = attache.getId(); s_logger.info("Host " + hostId + " is disconnecting with event " + event); @@ -1128,7 +1125,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { } } - handleDisconnect(attache, event); + handleDisconnectWithoutInvestigation(attache, event); /*TODO: call HA manager in monitors * host = _hostDao.findById(host.getId()); if (!event.equals(Event.PrepareUnmanaged) && !event.equals(Event.HypervisorVersionChanged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) { @@ -1155,7 +1152,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { if (_investigate == true) { handleDisconnectWithInvestigation(_attache, _event); } else { - handleDisconnect(_attache, _event); + handleDisconnectWithoutInvestigation(_attache, _event); } } catch (final Exception e) { s_logger.error("Exception caught while handling disconnect: ", e); @@ -1216,7 +1213,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { } @Override - public boolean reconnect(final long hostId) throws AgentUnavailableException { + public boolean reconnect(final long hostId) { HostVO host; host = _hostDao.findById(hostId); @@ -1236,46 +1233,25 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { return false; } - disconnect(attache, Event.ShutdownRequested, false); + disconnectWithoutInvestigation(attache, Event.ShutdownRequested); return true; } @Override public boolean executeUserRequest(long hostId, Event event) throws AgentUnavailableException { - if (event == Event.MaintenanceRequested) { - return maintain(hostId); - } else if (event == Event.ResetRequested) { - return cancelMaintenance(hostId); - } else if (event == Event.PrepareUnmanaged) { - return disconnect(hostId); - } else if (event == Event.Remove) { - User caller = _accountMgr.getActiveUser(User.UID_SYSTEM); - return deleteHost(hostId, false, false, caller); - } else if (event == Event.AgentDisconnected) { + if (event == Event.AgentDisconnected) { if (s_logger.isDebugEnabled()) { s_logger.debug("Received agent disconnect event for host " + hostId); } AgentAttache attache = null; attache = findAttache(hostId); if (attache != null) { - handleDisconnect(attache, Event.AgentDisconnected, false); + handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected); } return true; } else if (event == Event.ShutdownRequested) { return reconnect(hostId); } - else if (event == Event.UpdatePassword) { - AgentAttache attache = findAttache(hostId); - if (attache != null) { - DetailVO nv = _detailsDao.findDetail(hostId, ApiConstants.USERNAME); - String username = nv.getValue(); - nv = _detailsDao.findDetail(hostId, ApiConstants.PASSWORD); - String password = nv.getValue(); - UpdateHostPasswordCommand cmd = new UpdateHostPasswordCommand(username, password); - attache.updatePassword(cmd); - return true; - } - } return false; } @@ -1900,9 +1876,9 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { final String reason = shutdown.getReason(); s_logger.info("Host " + attache.getId() + " has informed us that it is shutting down with reason " + reason + " and detail " + shutdown.getDetail()); if (reason.equals(ShutdownCommand.Update)) { - disconnect(attache, Event.UpdateNeeded, false); + disconnectWithoutInvestigation(attache, Event.UpdateNeeded); } else if (reason.equals(ShutdownCommand.Requested)) { - disconnect(attache, Event.ShutdownRequested, false); + disconnectWithoutInvestigation(attache, Event.ShutdownRequested); } return; } else if (cmd instanceof AgentControlCommand) { @@ -1999,7 +1975,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { final Link link = task.getLink(); final AgentAttache attache = (AgentAttache) link.attachment(); if (attache != null) { - disconnect(attache, Event.AgentDisconnected, true); + disconnectWithInvestigation(attache, Event.AgentDisconnected); } else { s_logger.info("Connection from " + link.getIpAddress() + " closed but no cleanup was done."); link.close(); diff --git a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index 59b45f95c1e..6d065ed238f 100755 --- a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -255,7 +255,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } @Override - protected boolean handleDisconnect(AgentAttache attache, Status.Event event) { + protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event) { return handleDisconnect(attache, event, false, true); } @@ -265,19 +265,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) { - if (agent == null) { - return true; - } - boolean res; if (!investigate) { - res = super.handleDisconnect(agent, event); + res = super.handleDisconnectWithoutInvestigation(agent, event); } else { res = super.handleDisconnectWithInvestigation(agent, event); } - if (res && broadcast) { - notifyNodesInCluster(agent); + if (res) { + if (broadcast) { + notifyNodesInCluster(agent); + } return true; } else { return false; @@ -302,12 +300,18 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } @Override - public boolean reconnect(final long hostId) throws AgentUnavailableException { - Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.ShutdownRequested); - if (result != null) { - return result; + public boolean reconnect(final long hostId) { + Boolean result; + try { + result = _clusterMgr.propagateAgentEvent(hostId, Event.ShutdownRequested); + if (result != null) { + return result; + } + } catch (AgentUnavailableException e) { + s_logger.debug("cannot propagate agent reconnect because agent is not available", e); + return false; } - + return super.reconnect(hostId); } diff --git a/server/src/com/cloud/agent/manager/DirectAgentAttache.java b/server/src/com/cloud/agent/manager/DirectAgentAttache.java old mode 100644 new mode 100755 index bfc02f9bbf6..916603374cb --- a/server/src/com/cloud/agent/manager/DirectAgentAttache.java +++ b/server/src/com/cloud/agent/manager/DirectAgentAttache.java @@ -143,7 +143,7 @@ public class DirectAgentAttache extends AgentAttache { PingCommand cmd = resource.getCurrentStatus(_id); if (cmd == null) { s_logger.warn("Unable to get current status on " + _id); - _mgr.disconnect(DirectAgentAttache.this, Event.AgentDisconnected, true); + _mgr.disconnectWithInvestigation(DirectAgentAttache.this, Event.AgentDisconnected); return; } if (s_logger.isDebugEnabled()) { diff --git a/server/src/com/cloud/resource/ResourceManagerImpl.java b/server/src/com/cloud/resource/ResourceManagerImpl.java index 696a1f8d235..c13c49a6cf3 100755 --- a/server/src/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/com/cloud/resource/ResourceManagerImpl.java @@ -955,7 +955,7 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma } @Override - public Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException { + public Host reconnectHost(ReconnectHostCmd cmd) { Long hostId = cmd.getId(); HostVO host = _hostDao.findById(hostId); @@ -963,11 +963,7 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma throw new InvalidParameterValueException("Host with id " + hostId.toString() + " doesn't exist"); } - boolean result = _agentMgr.reconnect(hostId); - if (result) { - return host; - } - throw new CloudRuntimeException("Failed to reconnect host with id " + hostId.toString() + ", internal error."); + return (_agentMgr.reconnect(hostId) ? host : null); } @Override @@ -1657,8 +1653,6 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma public boolean executeUserRequest(long hostId, ResourceState.Event event) throws AgentUnavailableException { if (event == ResourceState.Event.AdminAskMaintenace) { return doMaintain(hostId); - } else if (event == ResourceState.Event.AdminAskReconnect) { - return doReconncetHost(hostId); } else if (event == ResourceState.Event.AdminCancelMaintenance) { return doCancelMaintenance(hostId); } else if (event == ResourceState.Event.DeleteHost) { @@ -1740,6 +1734,5 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma return true; } - } - + } }