diff --git a/api/src/com/cloud/api/commands/DeleteHostCmd.java b/api/src/com/cloud/api/commands/DeleteHostCmd.java index 851897666ea..7ef817e5982 100644 --- a/api/src/com/cloud/api/commands/DeleteHostCmd.java +++ b/api/src/com/cloud/api/commands/DeleteHostCmd.java @@ -16,8 +16,8 @@ * */ -package com.cloud.api.commands; - +package com.cloud.api.commands; + import org.apache.log4j.Logger; import com.cloud.api.ApiConstants; @@ -28,52 +28,56 @@ import com.cloud.api.ServerApiException; import com.cloud.api.response.SuccessResponse; import com.cloud.user.Account; +@Implementation(description = "Deletes a host.", responseObject = SuccessResponse.class) +public class DeleteHostCmd extends BaseCmd { + public static final Logger s_logger = Logger.getLogger(DeleteHostCmd.class.getName()); -@Implementation(description="Deletes a host.", responseObject=SuccessResponse.class) -public class DeleteHostCmd extends BaseCmd { - public static final Logger s_logger = Logger.getLogger(DeleteHostCmd.class.getName()); - - private static final String s_name = "deletehostresponse"; - - ///////////////////////////////////////////////////// - //////////////// API parameters ///////////////////// - ///////////////////////////////////////////////////// + private static final String s_name = "deletehostresponse"; - @Parameter(name=ApiConstants.ID, type=CommandType.LONG, required=true, description="the host ID") + // /////////////////////////////////////////////////// + // ////////////// API parameters ///////////////////// + // /////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ID, type = CommandType.LONG, required = true, description = "the host ID") private Long id; + @Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN, description = "Force delete the host. All HA enabled vms running on the host will be put to HA; HA disabled ones will be stopped") + private Boolean forced; - ///////////////////////////////////////////////////// - /////////////////// Accessors /////////////////////// - ///////////////////////////////////////////////////// + // /////////////////////////////////////////////////// + // ///////////////// Accessors /////////////////////// + // /////////////////////////////////////////////////// public Long getId() { return id; } + public boolean isForced() { + return (forced != null) ? forced : false; + } - ///////////////////////////////////////////////////// - /////////////// API Implementation/////////////////// - ///////////////////////////////////////////////////// + // /////////////////////////////////////////////////// + // ///////////// API Implementation/////////////////// + // /////////////////////////////////////////////////// @Override - public String getCommandName() { - return s_name; + public String getCommandName() { + return s_name; } - + @Override public long getEntityOwnerId() { return Account.ACCOUNT_ID_SYSTEM; } - + @Override - public void execute(){ - boolean result = _resourceService.deleteHost(this); + public void execute() { + boolean result = _resourceService.deleteHost(getId(), isForced()); if (result) { SuccessResponse response = new SuccessResponse(getCommandName()); this.setResponseObject(response); } else { throw new ServerApiException(BaseCmd.INTERNAL_ERROR, "Failed to delete host"); } - } + } } \ No newline at end of file diff --git a/api/src/com/cloud/api/commands/StopRouterCmd.java b/api/src/com/cloud/api/commands/StopRouterCmd.java index d60ff779eea..4a82d7177f4 100644 --- a/api/src/com/cloud/api/commands/StopRouterCmd.java +++ b/api/src/com/cloud/api/commands/StopRouterCmd.java @@ -16,8 +16,8 @@ * */ -package com.cloud.api.commands; - +package com.cloud.api.commands; + import org.apache.log4j.Logger; import com.cloud.api.ApiConstants; @@ -34,37 +34,36 @@ import com.cloud.exception.ResourceUnavailableException; import com.cloud.network.router.VirtualRouter; import com.cloud.user.Account; +@Implementation(description = "Stops a router.", responseObject = DomainRouterResponse.class) +public class StopRouterCmd extends BaseAsyncCmd { + public static final Logger s_logger = Logger.getLogger(StopRouterCmd.class.getName()); + private static final String s_name = "stoprouterresponse"; -@Implementation(description="Stops a router.", responseObject=DomainRouterResponse.class) -public class StopRouterCmd extends BaseAsyncCmd { - public static final Logger s_logger = Logger.getLogger(StopRouterCmd.class.getName()); - private static final String s_name = "stoprouterresponse"; - - ///////////////////////////////////////////////////// - //////////////// API parameters ///////////////////// - ///////////////////////////////////////////////////// + // /////////////////////////////////////////////////// + // ////////////// API parameters ///////////////////// + // /////////////////////////////////////////////////// - @Parameter(name=ApiConstants.ID, type=CommandType.LONG, required=true, description="the ID of the router") + @Parameter(name = ApiConstants.ID, type = CommandType.LONG, required = true, description = "the ID of the router") private Long id; - - @Parameter(name=ApiConstants.FORCED, type=CommandType.BOOLEAN, required=false, description="Force stop the VM. The caller knows the VM is stopped.") + + @Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN, required = false, description = "Force stop the VM. The caller knows the VM is stopped.") private Boolean forced; - ///////////////////////////////////////////////////// - /////////////////// Accessors /////////////////////// - ///////////////////////////////////////////////////// + // /////////////////////////////////////////////////// + // ///////////////// Accessors /////////////////////// + // /////////////////////////////////////////////////// public Long getId() { return id; } - ///////////////////////////////////////////////////// - /////////////// API Implementation/////////////////// - ///////////////////////////////////////////////////// + // /////////////////////////////////////////////////// + // ///////////// API Implementation/////////////////// + // /////////////////////////////////////////////////// @Override - public String getCommandName() { - return s_name; + public String getCommandName() { + return s_name; } @Override @@ -81,35 +80,35 @@ public class StopRouterCmd extends BaseAsyncCmd { public String getEventType() { return EventTypes.EVENT_ROUTER_STOP; } - + @Override public String getEventDescription() { - return "stopping router: " + getId(); + return "stopping router: " + getId(); } - + @Override public AsyncJob.Type getInstanceType() { - return AsyncJob.Type.DomainRouter; + return AsyncJob.Type.DomainRouter; } - + @Override public Long getInstanceId() { - return getId(); + return getId(); } - + public boolean isForced() { return (forced != null) ? forced : false; } @Override - public void execute() throws ConcurrentOperationException, ResourceUnavailableException{ + public void execute() throws ConcurrentOperationException, ResourceUnavailableException { VirtualRouter result = _routerService.stopRouter(getId(), isForced()); - if (result != null){ - DomainRouterResponse response =_responseGenerator.createDomainRouterResponse(result); + if (result != null) { + DomainRouterResponse response = _responseGenerator.createDomainRouterResponse(result); response.setResponseName(getCommandName()); this.setResponseObject(response); } else { throw new ServerApiException(BaseCmd.INTERNAL_ERROR, "Failed to stop router"); } - } -} + } +} diff --git a/api/src/com/cloud/resource/ResourceService.java b/api/src/com/cloud/resource/ResourceService.java index 9f32ad3ea96..a3289654807 100644 --- a/api/src/com/cloud/resource/ResourceService.java +++ b/api/src/com/cloud/resource/ResourceService.java @@ -24,7 +24,6 @@ import com.cloud.api.commands.AddHostCmd; import com.cloud.api.commands.AddSecondaryStorageCmd; import com.cloud.api.commands.CancelMaintenanceCmd; import com.cloud.api.commands.DeleteClusterCmd; -import com.cloud.api.commands.DeleteHostCmd; import com.cloud.api.commands.PrepareForMaintenanceCmd; import com.cloud.api.commands.ReconnectHostCmd; import com.cloud.api.commands.UpdateHostCmd; @@ -66,12 +65,12 @@ public interface ResourceService { Host maintain(PrepareForMaintenanceCmd cmd) throws InvalidParameterValueException; /** * Deletes a host + * @param hostId TODO + * @param isForced TODO * - * @param cmd - the command specifying hostId * @param true if deleted, false otherwise - * @throws InvalidParameterValueException */ - boolean deleteHost(DeleteHostCmd cmd) throws InvalidParameterValueException; + boolean deleteHost(long hostId, boolean isForced); Host getHost(long hostId); diff --git a/server/src/com/cloud/agent/AgentManager.java b/server/src/com/cloud/agent/AgentManager.java index a25b1d2bdae..0f8aa39b75b 100755 --- a/server/src/com/cloud/agent/AgentManager.java +++ b/server/src/com/cloud/agent/AgentManager.java @@ -44,192 +44,224 @@ import com.cloud.service.ServiceOfferingVO; import com.cloud.storage.StoragePoolVO; import com.cloud.storage.VMTemplateVO; import com.cloud.template.VirtualMachineTemplate; +import com.cloud.user.User; import com.cloud.uservm.UserVm; import com.cloud.utils.Pair; import com.cloud.utils.component.Manager; import com.cloud.vm.VMInstanceVO; /** - * AgentManager manages hosts. It directly coordinates between the - * DAOs and the connections it manages. + * AgentManager manages hosts. It directly coordinates between the DAOs and the connections it manages. */ public interface AgentManager extends Manager { public enum OnError { - Revert, - Continue, - Stop + Revert, Continue, Stop } - - /** - * easy send method that returns null if there's any errors. It handles all exceptions. - * - * @param hostId host id - * @param cmd command to send. - * @return Answer if successful; null if not. - */ + + /** + * easy send method that returns null if there's any errors. It handles all exceptions. + * + * @param hostId + * host id + * @param cmd + * command to send. + * @return Answer if successful; null if not. + */ Answer easySend(Long hostId, Command cmd); - + /** * Synchronous sending a command to the agent. * - * @param hostId id of the agent on host - * @param cmd command + * @param hostId + * id of the agent on host + * @param cmd + * command * @return an Answer */ Answer send(Long hostId, Command cmd, int timeout) throws AgentUnavailableException, OperationTimedoutException; - + Answer send(Long hostId, Command cmd) throws AgentUnavailableException, OperationTimedoutException; - + /** * Synchronous sending a list of commands to the agent. * - * @param hostId id of the agent on host - * @param cmds array of commands - * @param isControl Commands sent contains control commands - * @param stopOnError should the agent stop execution on the first error. + * @param hostId + * id of the agent on host + * @param cmds + * array of commands + * @param isControl + * Commands sent contains control commands + * @param stopOnError + * should the agent stop execution on the first error. * @return an array of Answer */ Answer[] send(Long hostId, Commands cmds) throws AgentUnavailableException, OperationTimedoutException; - + Answer[] send(Long hostId, Commands cmds, int timeout) throws AgentUnavailableException, OperationTimedoutException; - + /** * Asynchronous sending of a command to the agent. - * @param hostId id of the agent on the host. - * @param cmd Command to send. - * @param listener the listener to process the answer. + * + * @param hostId + * id of the agent on the host. + * @param cmd + * Command to send. + * @param listener + * the listener to process the answer. * @return sequence number. */ long gatherStats(Long hostId, Command cmd, Listener listener); - + /** * Asynchronous sending of a command to the agent. - * @param hostId id of the agent on the host. - * @param cmds Commands to send. - * @param stopOnError should the agent stop execution on the first error. - * @param listener the listener to process the answer. + * + * @param hostId + * id of the agent on the host. + * @param cmds + * Commands to send. + * @param stopOnError + * should the agent stop execution on the first error. + * @param listener + * the listener to process the answer. * @return sequence number. */ long send(Long hostId, Commands cmds, Listener listener) throws AgentUnavailableException; - + /** - * Register to listen for host events. These are mostly connection and - * disconnection events. + * Register to listen for host events. These are mostly connection and disconnection events. * * @param listener - * @param connections listen for connections - * @param commands listen for connections - * @param priority in listening for events. + * @param connections + * listen for connections + * @param commands + * listen for connections + * @param priority + * in listening for events. * @return id to unregister if needed. */ int registerForHostEvents(Listener listener, boolean connections, boolean commands, boolean priority); - + /** * Unregister for listening to host events. - * @param id returned from registerForHostEvents + * + * @param id + * returned from registerForHostEvents */ void unregisterForHostEvents(int id); - + /** * @return hosts currently connected. */ Set getConnectedHosts(); - + /** * Disconnect the agent. * - * @param hostId host to disconnect. - * @param reason the reason why we're disconnecting. + * @param hostId + * host to disconnect. + * @param reason + * the reason why we're disconnecting. * */ void disconnect(long hostId, Status.Event event, boolean investigate); - + /** * Obtains statistics for a host; vCPU utilisation, memory utilisation, and network utilisation + * * @param hostId * @return HostStats */ - HostStats getHostStatistics(long hostId); - - Long getGuestOSCategoryId(long hostId); - - String getHostTags(long hostId); - - /** - * Find a host based on the type needed, data center to deploy in, pod - * to deploy in, service offering, template, and list of host to avoid. - */ + HostStats getHostStatistics(long hostId); - Host findHost(Host.Type type, DataCenterVO dc, HostPodVO pod, StoragePoolVO sp, ServiceOfferingVO offering, VMTemplateVO template, VMInstanceVO vm, Host currentHost, Set avoid); - List listByDataCenter(long dcId); - List listByPod(long podId); + Long getGuestOSCategoryId(long hostId); - /** - * Adds a new host - * @param zoneId - * @param resource - * @param hostType - * @param hostDetails - * @return new Host - */ - public Host addHost(long zoneId, ServerResource resource, Type hostType, Map hostDetails); - - /** + String getHostTags(long hostId); + + /** + * Find a host based on the type needed, data center to deploy in, pod to deploy in, service offering, template, and list of + * host to avoid. + */ + + Host findHost(Host.Type type, DataCenterVO dc, HostPodVO pod, StoragePoolVO sp, ServiceOfferingVO offering, VMTemplateVO template, VMInstanceVO vm, Host currentHost, Set avoid); + + List listByDataCenter(long dcId); + + List listByPod(long podId); + + /** + * Adds a new host + * + * @param zoneId + * @param resource + * @param hostType + * @param hostDetails + * @return new Host + */ + public Host addHost(long zoneId, ServerResource resource, Type hostType, Map hostDetails); + + /** * Deletes a host * * @param hostId + * @param isForced + * TODO + * @param caller + * TODO * @param true if deleted, false otherwise */ - boolean deleteHost(long hostId); + boolean deleteHost(long hostId, boolean isForced, User caller); - /** - * Find a pod based on the user id, template, and data center. - * - * @param template - * @param dc - * @param userId - * @return - */ + /** + * Find a pod based on the user id, template, and data center. + * + * @param template + * @param dc + * @param userId + * @return + */ Pair findPod(VirtualMachineTemplate template, ServiceOfferingVO offering, DataCenterVO dc, long userId, Set avoids); - + /** * Put the agent in maintenance mode. * - * @param hostId id of the host to put in maintenance mode. - * @return true if it was able to put the agent into maintenance mode. false if not. + * @param hostId + * id of the host to put in maintenance mode. + * @return true if it was able to put the agent into maintenance mode. false if not. */ boolean maintain(long hostId) throws AgentUnavailableException; boolean maintenanceFailed(long hostId); - + /** * Cancel the maintenance mode. * - * @param hostId host id - * @return true if it's done. false if not. + * @param hostId + * host id + * @return true if it's done. false if not. */ boolean cancelMaintenance(long hostId); /** * Check to see if a virtual machine can be upgraded to the given service offering - * + * * @param vm * @param offering * @return true if the host can handle the upgrade, false otherwise */ boolean isVirtualMachineUpgradable(final UserVm vm, final ServiceOffering offering); - + public boolean executeUserRequest(long hostId, Event event) throws AgentUnavailableException; + public boolean reconnect(final long hostId) throws AgentUnavailableException; - public List discoverHosts(Long dcId, Long podId, Long clusterId, String clusterName, String url, String username, String password, String hypervisor, List hostTags) - throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException; + public List discoverHosts(Long dcId, Long podId, Long clusterId, String clusterName, String url, String username, String password, String hypervisor, List hostTags) + throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException; - Answer easySend(Long hostId, Command cmd, int timeout); + Answer easySend(Long hostId, Command cmd, int timeout); boolean isHostNativeHAEnabled(long hostId); Answer sendTo(Long dcId, HypervisorType type, Command cmd); - + void notifyAnswersToMonitors(long agentId, long seq, Answer[] answers); } diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index ebf434b9288..e9b3654c86a 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -78,7 +78,6 @@ import com.cloud.api.commands.AddHostCmd; import com.cloud.api.commands.AddSecondaryStorageCmd; import com.cloud.api.commands.CancelMaintenanceCmd; import com.cloud.api.commands.DeleteClusterCmd; -import com.cloud.api.commands.DeleteHostCmd; import com.cloud.api.commands.PrepareForMaintenanceCmd; import com.cloud.api.commands.ReconnectHostCmd; import com.cloud.api.commands.UpdateHostCmd; @@ -139,7 +138,6 @@ import com.cloud.org.Grouping; import com.cloud.resource.Discoverer; import com.cloud.resource.ResourceService; import com.cloud.resource.ServerResource; -import com.cloud.server.Criteria; import com.cloud.server.ManagementServer; import com.cloud.service.ServiceOfferingVO; import com.cloud.storage.GuestOSCategoryVO; @@ -158,6 +156,7 @@ import com.cloud.storage.resource.DummySecondaryStorageResource; import com.cloud.template.VirtualMachineTemplate; import com.cloud.user.Account; import com.cloud.user.AccountManager; +import com.cloud.user.User; import com.cloud.user.UserContext; import com.cloud.user.dao.UserStatisticsDao; import com.cloud.uservm.UserVm; @@ -183,224 +182,208 @@ import com.cloud.utils.nio.Link; import com.cloud.utils.nio.NioServer; import com.cloud.utils.nio.Task; import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine.State; +import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.VirtualMachineProfileImpl; import com.cloud.vm.dao.VMInstanceDao; /** - * Implementation of the Agent Manager. This class controls the connection to - * the agents. + * Implementation of the Agent Manager. This class controls the connection to the agents. * - * @config {@table || Param Name | Description | Values | Default || || port | - * port to listen on for agent connection. | Integer | 8250 || || - * workers | # of worker threads | Integer | 5 || || router.template.id - * | default id for template | Integer | 1 || || router.ram.size | - * default ram for router vm in mb | Integer | 128 || || - * router.ip.address | ip address for the router | ip | 10.1.1.1 || || - * wait | Time to wait for control commands to return | seconds | 1800 - * || || domain | domain for domain routers| String | foo.com || || - * alert.wait | time to wait before alerting on a disconnected agent | - * seconds | 1800 || || update.wait | time to wait before alerting on a - * updating agent | seconds | 600 || || ping.interval | ping interval in - * seconds | seconds | 60 || || instance.name | Name of the deployment - * String | required || || start.retry | Number of times to retry start - * | Number | 2 || || ping.timeout | multiplier to ping.interval before - * announcing an agent has timed out | float | 2.0x || || - * router.stats.interval | interval to report router statistics | - * seconds | 300s || * } + * @config {@table || Param Name | Description | Values | Default || || port | port to listen on for agent connection. | Integer + * | 8250 || || workers | # of worker threads | Integer | 5 || || router.template.id | default id for template | Integer + * | 1 || || router.ram.size | default ram for router vm in mb | Integer | 128 || || router.ip.address | ip address for + * the router | ip | 10.1.1.1 || || wait | Time to wait for control commands to return | seconds | 1800 || || domain | + * domain for domain routers| String | foo.com || || alert.wait | time to wait before alerting on a disconnected agent | + * seconds | 1800 || || update.wait | time to wait before alerting on a updating agent | seconds | 600 || || + * ping.interval | ping interval in seconds | seconds | 60 || || instance.name | Name of the deployment String | + * required || || start.retry | Number of times to retry start | Number | 2 || || ping.timeout | multiplier to + * ping.interval before announcing an agent has timed out | float | 2.0x || || router.stats.interval | interval to + * report router statistics | seconds | 300s || * } **/ @Local(value = { AgentManager.class, ResourceService.class }) public class AgentManagerImpl implements AgentManager, HandlerFactory, ResourceService, Manager { - private static final Logger s_logger = Logger - .getLogger(AgentManagerImpl.class); + private static final Logger s_logger = Logger.getLogger(AgentManagerImpl.class); - protected ConcurrentHashMap _agents = new ConcurrentHashMap(10007); - protected List> _hostMonitors = new ArrayList>(17); - protected List> _cmdMonitors = new ArrayList>(17); - protected int _monitorId = 0; + protected ConcurrentHashMap _agents = new ConcurrentHashMap(10007); + protected List> _hostMonitors = new ArrayList>(17); + protected List> _cmdMonitors = new ArrayList>(17); + protected int _monitorId = 0; - protected NioServer _connection; - @Inject - protected HostDao _hostDao = null; - @Inject - protected UserStatisticsDao _userStatsDao = null; - @Inject - protected DataCenterDao _dcDao = null; - @Inject - protected VlanDao _vlanDao = null; - @Inject - protected DataCenterIpAddressDao _privateIPAddressDao = null; - @Inject - protected IPAddressDao _publicIPAddressDao = null; - @Inject - protected HostPodDao _podDao = null; - protected Adapters _hostAllocators = null; - protected Adapters _podAllocators = null; - @Inject - protected EventDao _eventDao = null; - @Inject - protected VMInstanceDao _vmDao = null; - @Inject - protected VolumeDao _volDao = null; - @Inject - protected CapacityDao _capacityDao = null; - @Inject - protected ConfigurationDao _configDao = null; - @Inject - protected StoragePoolDao _storagePoolDao = null; - @Inject - protected StoragePoolHostDao _storagePoolHostDao = null; - @Inject - protected GuestOSCategoryDao _guestOSCategoryDao = null; - @Inject - protected DetailsDao _hostDetailsDao = null; - @Inject - protected ClusterDao _clusterDao = null; - @Inject - protected ClusterDetailsDao _clusterDetailsDao = null; - @Inject + protected NioServer _connection; + @Inject + protected HostDao _hostDao = null; + @Inject + protected UserStatisticsDao _userStatsDao = null; + @Inject + protected DataCenterDao _dcDao = null; + @Inject + protected VlanDao _vlanDao = null; + @Inject + protected DataCenterIpAddressDao _privateIPAddressDao = null; + @Inject + protected IPAddressDao _publicIPAddressDao = null; + @Inject + protected HostPodDao _podDao = null; + protected Adapters _hostAllocators = null; + protected Adapters _podAllocators = null; + @Inject + protected EventDao _eventDao = null; + @Inject + protected VMInstanceDao _vmDao = null; + @Inject + protected VolumeDao _volDao = null; + @Inject + protected CapacityDao _capacityDao = null; + @Inject + protected ConfigurationDao _configDao = null; + @Inject + protected StoragePoolDao _storagePoolDao = null; + @Inject + protected StoragePoolHostDao _storagePoolHostDao = null; + @Inject + protected GuestOSCategoryDao _guestOSCategoryDao = null; + @Inject + protected DetailsDao _hostDetailsDao = null; + @Inject + protected ClusterDao _clusterDao = null; + @Inject + protected ClusterDetailsDao _clusterDetailsDao = null; + @Inject protected HostTagsDao _hostTagsDao = null; - - @Inject(adapter = DeploymentPlanner.class) - private Adapters _planners; - protected Adapters _discoverers = null; - protected int _port; + @Inject(adapter = DeploymentPlanner.class) + private Adapters _planners; - @Inject - protected HighAvailabilityManager _haMgr = null; - @Inject - protected AlertManager _alertMgr = null; + protected Adapters _discoverers = null; + protected int _port; - @Inject - protected NetworkManager _networkMgr = null; + @Inject + protected HighAvailabilityManager _haMgr = null; + @Inject + protected AlertManager _alertMgr = null; - @Inject - protected UpgradeManager _upgradeMgr = null; + @Inject + protected NetworkManager _networkMgr = null; + + @Inject + protected UpgradeManager _upgradeMgr = null; + + @Inject + protected StorageManager _storageMgr = null; - @Inject - protected StorageManager _storageMgr = null; - @Inject protected AccountManager _accountMgr = null; - protected int _retry = 2; + @Inject + protected VirtualMachineManager _vmMgr = null; - protected String _name; - protected String _instance; + protected int _retry = 2; - protected int _wait; - protected int _updateWait; - protected int _alertWait; - protected long _nodeId = -1; - protected float _overProvisioningFactor = 1; - protected float _cpuOverProvisioningFactor = 1; + protected String _name; + protected String _instance; - protected Random _rand = new Random(System.currentTimeMillis()); + protected int _wait; + protected int _updateWait; + protected int _alertWait; + protected long _nodeId = -1; + protected float _overProvisioningFactor = 1; + protected float _cpuOverProvisioningFactor = 1; - protected int _pingInterval; - protected long _pingTimeout; - protected AgentMonitor _monitor = null; + protected Random _rand = new Random(System.currentTimeMillis()); - protected ExecutorService _executor; + protected int _pingInterval; + protected long _pingTimeout; + protected AgentMonitor _monitor = null; - @Inject - protected VMTemplateDao _tmpltDao; - @Inject - protected VMTemplateHostDao _vmTemplateHostDao; + protected ExecutorService _executor; - @Override - public boolean configure(final String name, final Map params) - throws ConfigurationException { - _name = name; + @Inject + protected VMTemplateDao _tmpltDao; + @Inject + protected VMTemplateHostDao _vmTemplateHostDao; - Request.initBuilder(); + @Override + public boolean configure(final String name, final Map params) throws ConfigurationException { + _name = name; - final ComponentLocator locator = ComponentLocator.getCurrentLocator(); - ConfigurationDao configDao = locator.getDao(ConfigurationDao.class); - if (configDao == null) { - throw new ConfigurationException( - "Unable to get the configuration dao."); - } + Request.initBuilder(); - final Map configs = configDao.getConfiguration( - "AgentManager", params); - _port = NumbersUtil.parseInt(configs.get("port"), 8250); - final int workers = NumbersUtil.parseInt(configs.get("workers"), 5); + final ComponentLocator locator = ComponentLocator.getCurrentLocator(); + ConfigurationDao configDao = locator.getDao(ConfigurationDao.class); + if (configDao == null) { + throw new ConfigurationException("Unable to get the configuration dao."); + } - String value = configs.get("ping.interval"); - _pingInterval = NumbersUtil.parseInt(value, 60); + final Map configs = configDao.getConfiguration("AgentManager", params); + _port = NumbersUtil.parseInt(configs.get("port"), 8250); + final int workers = NumbersUtil.parseInt(configs.get("workers"), 5); - value = configs.get("wait"); - _wait = NumbersUtil.parseInt(value, 1800) * 1000; + String value = configs.get("ping.interval"); + _pingInterval = NumbersUtil.parseInt(value, 60); - value = configs.get("alert.wait"); - _alertWait = NumbersUtil.parseInt(value, 1800); + value = configs.get("wait"); + _wait = NumbersUtil.parseInt(value, 1800) * 1000; - value = configs.get("update.wait"); - _updateWait = NumbersUtil.parseInt(value, 600); + value = configs.get("alert.wait"); + _alertWait = NumbersUtil.parseInt(value, 1800); - value = configs.get("ping.timeout"); - final float multiplier = value != null ? Float.parseFloat(value) : 2.5f; - _pingTimeout = (long) (multiplier * _pingInterval); + value = configs.get("update.wait"); + _updateWait = NumbersUtil.parseInt(value, 600); - s_logger.info("Ping Timeout is " + _pingTimeout); + value = configs.get("ping.timeout"); + final float multiplier = value != null ? Float.parseFloat(value) : 2.5f; + _pingTimeout = (long) (multiplier * _pingInterval); - _instance = configs.get("instance.name"); - if (_instance == null) { - _instance = "DEFAULT"; - } + s_logger.info("Ping Timeout is " + _pingTimeout); - _hostAllocators = locator.getAdapters(HostAllocator.class); - if (_hostAllocators == null || !_hostAllocators.isSet()) { - throw new ConfigurationException( - "Unable to find an host allocator."); - } + _instance = configs.get("instance.name"); + if (_instance == null) { + _instance = "DEFAULT"; + } - _podAllocators = locator.getAdapters(PodAllocator.class); - if (_podAllocators == null || !_podAllocators.isSet()) { - throw new ConfigurationException("Unable to find an pod allocator."); - } + _hostAllocators = locator.getAdapters(HostAllocator.class); + if (_hostAllocators == null || !_hostAllocators.isSet()) { + throw new ConfigurationException("Unable to find an host allocator."); + } - _discoverers = locator.getAdapters(Discoverer.class); + _podAllocators = locator.getAdapters(PodAllocator.class); + if (_podAllocators == null || !_podAllocators.isSet()) { + throw new ConfigurationException("Unable to find an pod allocator."); + } - if (_nodeId == -1) { - // FIXME: We really should not do this like this. It should be done - // at config time and is stored as a config variable. - _nodeId = MacAddress.getMacAddress().toLong(); - } + _discoverers = locator.getAdapters(Discoverer.class); - _hostDao.markHostsAsDisconnected(_nodeId, Status.Up, Status.Connecting, - Status.Updating, Status.Disconnected, Status.Down); + if (_nodeId == -1) { + // FIXME: We really should not do this like this. It should be done + // at config time and is stored as a config variable. + _nodeId = MacAddress.getMacAddress().toLong(); + } - _monitor = new AgentMonitor(_nodeId, _hostDao, _volDao, _vmDao, _dcDao, - _podDao, this, _alertMgr, _pingTimeout); - registerForHostEvents(_monitor, true, true, false); + _hostDao.markHostsAsDisconnected(_nodeId, Status.Up, Status.Connecting, Status.Updating, Status.Disconnected, Status.Down); - _executor = new ThreadPoolExecutor(10, 100, 60l, TimeUnit.SECONDS, - new LinkedBlockingQueue(), new NamedThreadFactory( - "AgentTaskPool")); + _monitor = new AgentMonitor(_nodeId, _hostDao, _volDao, _vmDao, _dcDao, _podDao, this, _alertMgr, _pingTimeout); + registerForHostEvents(_monitor, true, true, false); - String overProvisioningFactorStr = configs - .get("storage.overprovisioning.factor"); - _overProvisioningFactor = NumbersUtil.parseFloat( - overProvisioningFactorStr, 1); + _executor = new ThreadPoolExecutor(10, 100, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentTaskPool")); - String cpuOverProvisioningFactorStr = configs - .get("cpu.overprovisioning.factor"); - _cpuOverProvisioningFactor = NumbersUtil.parseFloat( - cpuOverProvisioningFactorStr, 1); - if (_cpuOverProvisioningFactor < 1) { - _cpuOverProvisioningFactor = 1; - } + String overProvisioningFactorStr = configs.get("storage.overprovisioning.factor"); + _overProvisioningFactor = NumbersUtil.parseFloat(overProvisioningFactorStr, 1); - _connection = new NioServer("AgentManager", _port, workers + 10, this); + String cpuOverProvisioningFactorStr = configs.get("cpu.overprovisioning.factor"); + _cpuOverProvisioningFactor = NumbersUtil.parseFloat(cpuOverProvisioningFactorStr, 1); + if (_cpuOverProvisioningFactor < 1) { + _cpuOverProvisioningFactor = 1; + } - s_logger.info("Listening on " + _port + " with " + workers + " workers"); - return true; - } + _connection = new NioServer("AgentManager", _port, workers + 10, this); - @Override - public boolean isHostNativeHAEnabled(long hostId) { + s_logger.info("Listening on " + _port + " with " + workers + " workers"); + return true; + } + + @Override + public boolean isHostNativeHAEnabled(long hostId) { HostVO host = _hostDao.findById(hostId); if (host.getClusterId() != null) { ClusterDetailsVO detail = _clusterDetailsDao.findDetail(host.getClusterId(), "NativeHA"); @@ -408,3091 +391,2673 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, ResourceS } return false; } - - @Override - public Task create(Task.Type type, Link link, byte[] data) { - return new AgentHandler(type, link, data); - } - @Override - public int registerForHostEvents(final Listener listener, - boolean connections, boolean commands, boolean priority) { - synchronized (_hostMonitors) { - _monitorId++; - if (connections) { - if (priority) { - _hostMonitors.add(0, new Pair( - _monitorId, listener)); - } else { - _hostMonitors.add(new Pair(_monitorId, - listener)); - } - } - if (commands) { - if (priority) { - _cmdMonitors.add(0, new Pair(_monitorId, - listener)); - } else { - _cmdMonitors.add(new Pair(_monitorId, - listener)); - } - } - if (s_logger.isDebugEnabled()) { - s_logger.debug("Registering listener " - + listener.getClass().getSimpleName() + " with id " - + _monitorId); - } - return _monitorId; - } - } - - @Override - public void unregisterForHostEvents(final int id) { - s_logger.debug("Deregistering " + id); - _hostMonitors.remove(id); - } - - private AgentControlAnswer handleControlCommand(AgentAttache attache, - final AgentControlCommand cmd) { - AgentControlAnswer answer = null; - - for (Pair listener : _cmdMonitors) { - answer = listener.second().processControlCommand(attache.getId(), - cmd); - - if (answer != null) { - return answer; - } - } - - s_logger.warn("No handling of agent control command: " + cmd.toString() - + " sent from " + attache.getId()); - return new AgentControlAnswer(cmd); - } - - public void handleCommands(AgentAttache attache, final long sequence, - final Command[] cmds) { - for (Pair listener : _cmdMonitors) { - boolean processed = listener.second().processCommands( - attache.getId(), sequence, cmds); - if (s_logger.isTraceEnabled()) { - s_logger.trace("SeqA " + attache.getId() + "-" + sequence - + ": " + (processed ? "processed" : "not processed") - + " by " + listener.getClass()); - } - } - } - - public void notifyAnswersToMonitors(long agentId, long seq, Answer[] answers) { - for (Pair listener : _cmdMonitors) { - listener.second().processAnswers(agentId, seq, answers); - } + @Override + public Task create(Task.Type type, Link link, byte[] data) { + return new AgentHandler(type, link, data); } - public AgentAttache findAttache(long hostId) { - return _agents.get(hostId); - } + @Override + public int registerForHostEvents(final Listener listener, boolean connections, boolean commands, boolean priority) { + synchronized (_hostMonitors) { + _monitorId++; + if (connections) { + if (priority) { + _hostMonitors.add(0, new Pair(_monitorId, listener)); + } else { + _hostMonitors.add(new Pair(_monitorId, listener)); + } + } + if (commands) { + if (priority) { + _cmdMonitors.add(0, new Pair(_monitorId, listener)); + } else { + _cmdMonitors.add(new Pair(_monitorId, listener)); + } + } + if (s_logger.isDebugEnabled()) { + s_logger.debug("Registering listener " + listener.getClass().getSimpleName() + " with id " + _monitorId); + } + return _monitorId; + } + } - @Override - public Set getConnectedHosts() { - // make the returning set be safe for concurrent iteration - final HashSet result = new HashSet(); + @Override + public void unregisterForHostEvents(final int id) { + s_logger.debug("Deregistering " + id); + _hostMonitors.remove(id); + } - synchronized (_agents) { - final Set s = _agents.keySet(); - for (final Long id : s) { - result.add(id); - } - } - return result; - } + private AgentControlAnswer handleControlCommand(AgentAttache attache, final AgentControlCommand cmd) { + AgentControlAnswer answer = null; - @Override - public Host findHost(final Host.Type type, final DataCenterVO dc, - final HostPodVO pod, final StoragePoolVO sp, - final ServiceOfferingVO offering, final VMTemplateVO template, - VMInstanceVO vm, Host currentHost, final Set avoid) { - VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl( - vm, template, offering, null, null); - DeployDestination dest = null; - DataCenterDeployment plan = new DataCenterDeployment(dc.getId(), - pod.getId(), sp.getClusterId(), null, null); - ExcludeList avoids = new ExcludeList(); - for (Host h : avoid) { - avoids.addHost(h.getId()); - } + for (Pair listener : _cmdMonitors) { + answer = listener.second().processControlCommand(attache.getId(), cmd); - for (DeploymentPlanner planner : _planners) { - try { - dest = planner.plan(vmProfile, plan, avoids); - if (dest != null) { - return dest.getHost(); - } - } catch (InsufficientServerCapacityException e) { + if (answer != null) { + return answer; + } + } - } - } + s_logger.warn("No handling of agent control command: " + cmd.toString() + " sent from " + attache.getId()); + return new AgentControlAnswer(cmd); + } - s_logger.warn("findHost() could not find a non-null host."); - return null; - } + public void handleCommands(AgentAttache attache, final long sequence, final Command[] cmds) { + for (Pair listener : _cmdMonitors) { + boolean processed = listener.second().processCommands(attache.getId(), sequence, cmds); + if (s_logger.isTraceEnabled()) { + s_logger.trace("SeqA " + attache.getId() + "-" + sequence + ": " + (processed ? "processed" : "not processed") + " by " + listener.getClass()); + } + } + } - @Override - public List listByDataCenter(long dcId) { - List pods = _podDao.listByDataCenterId(dcId); - ArrayList pcs = new ArrayList(); - for (HostPodVO pod : pods) { - List clusters = _clusterDao.listByPodId(pod.getId()); - if (clusters.size() == 0) { - pcs.add(new PodCluster(pod, null)); - } else { - for (ClusterVO cluster : clusters) { - pcs.add(new PodCluster(pod, cluster)); - } - } - } - return pcs; - } + @Override + public void notifyAnswersToMonitors(long agentId, long seq, Answer[] answers) { + for (Pair listener : _cmdMonitors) { + listener.second().processAnswers(agentId, seq, answers); + } + } - @Override - public List listByPod(long podId) { - ArrayList pcs = new ArrayList(); - HostPodVO pod = _podDao.findById(podId); - if (pod == null) { - return pcs; - } - List clusters = _clusterDao.listByPodId(pod.getId()); - if (clusters.size() == 0) { - pcs.add(new PodCluster(pod, null)); - } else { - for (ClusterVO cluster : clusters) { - pcs.add(new PodCluster(pod, cluster)); - } - } - return pcs; - } + public AgentAttache findAttache(long hostId) { + return _agents.get(hostId); + } - protected AgentAttache handleDirectConnect(ServerResource resource, - StartupCommand[] startup, Map details, boolean old, List hostTags, String allocationState) - throws ConnectionException { - if (startup == null) { - return null; - } - HostVO server = createHost(startup, resource, details, old, hostTags, allocationState); - if (server == null) { - return null; - } + @Override + public Set getConnectedHosts() { + // make the returning set be safe for concurrent iteration + final HashSet result = new HashSet(); - long id = server.getId(); + synchronized (_agents) { + final Set s = _agents.keySet(); + for (final Long id : s) { + result.add(id); + } + } + return result; + } - AgentAttache attache = createAttache(id, server, resource); + @Override + public Host findHost(final Host.Type type, final DataCenterVO dc, final HostPodVO pod, final StoragePoolVO sp, final ServiceOfferingVO offering, final VMTemplateVO template, VMInstanceVO vm, + Host currentHost, final Set avoid) { + VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, null, null); + DeployDestination dest = null; + DataCenterDeployment plan = new DataCenterDeployment(dc.getId(), pod.getId(), sp.getClusterId(), null, null); + ExcludeList avoids = new ExcludeList(); + for (Host h : avoid) { + avoids.addHost(h.getId()); + } - attache = notifyMonitorsOfConnection(attache, startup); + for (DeploymentPlanner planner : _planners) { + try { + dest = planner.plan(vmProfile, plan, avoids); + if (dest != null) { + return dest.getHost(); + } + } catch (InsufficientServerCapacityException e) { - return attache; - } + } + } - @Override - public List discoverCluster(AddClusterCmd cmd) - throws IllegalArgumentException, DiscoveryException, - InvalidParameterValueException { - Long dcId = cmd.getZoneId(); - Long podId = cmd.getPodId(); - String clusterName = cmd.getClusterName(); - String url = cmd.getUrl(); - String username = cmd.getUsername(); - String password = cmd.getPassword(); - - URI uri = null; + s_logger.warn("findHost() could not find a non-null host."); + return null; + } - // Check if the zone exists in the system - DataCenterVO zone = _dcDao.findById(dcId); - if (zone == null) { - throw new InvalidParameterValueException("Can't find zone by id " - + dcId); - } - - Account account = UserContext.current().getCaller(); - if(Grouping.AllocationState.Disabled == zone.getAllocationState() && !_accountMgr.isRootAdmin(account.getType())){ - throw new PermissionDeniedException("Cannot perform this operation, Zone is currently disabled: "+ dcId ); - } - - // Check if the pod exists in the system - if (podId != null) { - if (_podDao.findById(podId) == null) { - throw new InvalidParameterValueException( - "Can't find pod by id " + podId); - } - // check if pod belongs to the zone - HostPodVO pod = _podDao.findById(podId); - if (!Long.valueOf(pod.getDataCenterId()).equals(dcId)) { - throw new InvalidParameterValueException("Pod " + podId - + " doesn't belong to the zone " + dcId); - } - } + @Override + public List listByDataCenter(long dcId) { + List pods = _podDao.listByDataCenterId(dcId); + ArrayList pcs = new ArrayList(); + for (HostPodVO pod : pods) { + List clusters = _clusterDao.listByPodId(pod.getId()); + if (clusters.size() == 0) { + pcs.add(new PodCluster(pod, null)); + } else { + for (ClusterVO cluster : clusters) { + pcs.add(new PodCluster(pod, cluster)); + } + } + } + return pcs; + } - // Verify cluster information and create a new cluster if needed - if (clusterName == null || clusterName.isEmpty()) { - throw new InvalidParameterValueException( - "Please specify cluster name"); - } + @Override + public List listByPod(long podId) { + ArrayList pcs = new ArrayList(); + HostPodVO pod = _podDao.findById(podId); + if (pod == null) { + return pcs; + } + List clusters = _clusterDao.listByPodId(pod.getId()); + if (clusters.size() == 0) { + pcs.add(new PodCluster(pod, null)); + } else { + for (ClusterVO cluster : clusters) { + pcs.add(new PodCluster(pod, cluster)); + } + } + return pcs; + } - if (cmd.getHypervisor() == null || cmd.getHypervisor().isEmpty()) { - throw new InvalidParameterValueException( - "Please specify a hypervisor"); - } + protected AgentAttache handleDirectConnect(ServerResource resource, StartupCommand[] startup, Map details, boolean old, List hostTags, String allocationState) + throws ConnectionException { + if (startup == null) { + return null; + } + HostVO server = createHost(startup, resource, details, old, hostTags, allocationState); + if (server == null) { + return null; + } - Hypervisor.HypervisorType hypervisorType = Hypervisor.HypervisorType.getType(cmd.getHypervisor()); - if (hypervisorType == null) { - s_logger.error("Unable to resolve " + cmd.getHypervisor() + " to a valid supported hypervisor type"); - throw new InvalidParameterValueException("Unable to resolve " + cmd.getHypervisor() + " to a supported "); - } + long id = server.getId(); - Cluster.ClusterType clusterType = null; - if (cmd.getClusterType() != null && !cmd.getClusterType().isEmpty()) { - clusterType = Cluster.ClusterType.valueOf(cmd.getClusterType()); - } - if (clusterType == null) { - clusterType = Cluster.ClusterType.CloudManaged; - } - - Grouping.AllocationState allocationState = null; - if (cmd.getAllocationState() != null && !cmd.getAllocationState().isEmpty()) { - try{ - allocationState = Grouping.AllocationState.valueOf(cmd.getAllocationState()); - }catch(IllegalArgumentException ex){ - throw new InvalidParameterValueException("Unable to resolve Allocation State '" + cmd.getAllocationState() + "' to a supported state"); - } - } - if (allocationState == null) { - allocationState = Grouping.AllocationState.Enabled; - } - - Discoverer discoverer = getMatchingDiscover(hypervisorType); - if (discoverer == null) { - - throw new InvalidParameterValueException("Could not find corresponding resource manager for " + cmd.getHypervisor()); - } + AgentAttache attache = createAttache(id, server, resource); - List result = new ArrayList(); + attache = notifyMonitorsOfConnection(attache, startup); - long clusterId = 0; - ClusterVO cluster = new ClusterVO(dcId, podId, clusterName); - cluster.setHypervisorType(cmd.getHypervisor()); + return attache; + } - cluster.setClusterType(clusterType); - cluster.setAllocationState(allocationState); - try { - cluster = _clusterDao.persist(cluster); - } catch (Exception e) { - // no longer tolerate exception during the cluster creation phase - throw new CloudRuntimeException("Unable to create cluster " - + clusterName + " in pod " + podId + " and data center " - + dcId, e); - } - clusterId = cluster.getId(); - result.add(cluster); + @Override + public List discoverCluster(AddClusterCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { + Long dcId = cmd.getZoneId(); + Long podId = cmd.getPodId(); + String clusterName = cmd.getClusterName(); + String url = cmd.getUrl(); + String username = cmd.getUsername(); + String password = cmd.getPassword(); - if (clusterType == Cluster.ClusterType.CloudManaged) { - return result; - } + URI uri = null; - // save cluster details for later cluster/host cross-checking - Map details = new HashMap(); - details.put("url", url); - details.put("username", username); - details.put("password", password); - _clusterDetailsDao.persist(cluster.getId(), details); + // Check if the zone exists in the system + DataCenterVO zone = _dcDao.findById(dcId); + if (zone == null) { + throw new InvalidParameterValueException("Can't find zone by id " + dcId); + } - boolean success = false; - try { - try { - uri = new URI(UriUtils.encodeURIComponent(url)); - if (uri.getScheme() == null) { - throw new InvalidParameterValueException( - "uri.scheme is null " + url + ", add http:// as a prefix"); - } else if (uri.getScheme().equalsIgnoreCase("http")) { - if (uri.getHost() == null - || uri.getHost().equalsIgnoreCase("") - || uri.getPath() == null - || uri.getPath().equalsIgnoreCase("")) { - throw new InvalidParameterValueException( - "Your host and/or path is wrong. Make sure it's of the format http://hostname/path"); - } - } - } catch (URISyntaxException e) { - throw new InvalidParameterValueException(url - + " is not a valid uri"); - } + Account account = UserContext.current().getCaller(); + if (Grouping.AllocationState.Disabled == zone.getAllocationState() && !_accountMgr.isRootAdmin(account.getType())) { + throw new PermissionDeniedException("Cannot perform this operation, Zone is currently disabled: " + dcId); + } - List hosts = new ArrayList(); - Map> resources = null; + // Check if the pod exists in the system + if (podId != null) { + if (_podDao.findById(podId) == null) { + throw new InvalidParameterValueException("Can't find pod by id " + podId); + } + // check if pod belongs to the zone + HostPodVO pod = _podDao.findById(podId); + if (!Long.valueOf(pod.getDataCenterId()).equals(dcId)) { + throw new InvalidParameterValueException("Pod " + podId + " doesn't belong to the zone " + dcId); + } + } - try { - resources = discoverer.find(dcId, podId, clusterId, uri, username, password); - } catch (Exception e) { - s_logger.info("Exception in external cluster discovery process with discoverer: " - + discoverer.getName()); - } - if (resources != null) { - for (Map.Entry> entry : resources.entrySet()) { - ServerResource resource = entry.getKey(); + // Verify cluster information and create a new cluster if needed + if (clusterName == null || clusterName.isEmpty()) { + throw new InvalidParameterValueException("Please specify cluster name"); + } - // For Hyper-V, we are here means agent have already started and connected to management server - if (hypervisorType == Hypervisor.HypervisorType.Hyperv) { - break; - } + if (cmd.getHypervisor() == null || cmd.getHypervisor().isEmpty()) { + throw new InvalidParameterValueException("Please specify a hypervisor"); + } - AgentAttache attache = simulateStart(resource, entry.getValue(), true, null, null); - if (attache != null) { - hosts.add(_hostDao.findById(attache.getId())); - } - discoverer.postDiscovery(hosts, _nodeId); - } - s_logger.info("External cluster has been successfully discovered by " + discoverer.getName()); - success = true; - return result; - } + Hypervisor.HypervisorType hypervisorType = Hypervisor.HypervisorType.getType(cmd.getHypervisor()); + if (hypervisorType == null) { + s_logger.error("Unable to resolve " + cmd.getHypervisor() + " to a valid supported hypervisor type"); + throw new InvalidParameterValueException("Unable to resolve " + cmd.getHypervisor() + " to a supported "); + } - s_logger.warn("Unable to find the server resources at " + url); - throw new DiscoveryException("Unable to add the external cluster"); - } catch(Throwable e) { - s_logger.error("Unexpected exception ", e); - throw new DiscoveryException("Unable to add the external cluster due to unhandled exception"); - } finally { - if (!success) { - _clusterDetailsDao.deleteDetails(clusterId); - _clusterDao.remove(clusterId); - } - } - } + Cluster.ClusterType clusterType = null; + if (cmd.getClusterType() != null && !cmd.getClusterType().isEmpty()) { + clusterType = Cluster.ClusterType.valueOf(cmd.getClusterType()); + } + if (clusterType == null) { + clusterType = Cluster.ClusterType.CloudManaged; + } - private Discoverer getMatchingDiscover( - Hypervisor.HypervisorType hypervisorType) { - Enumeration en = _discoverers.enumeration(); - while (en.hasMoreElements()) { - Discoverer discoverer = en.nextElement(); - if (discoverer.getHypervisorType() == hypervisorType) { - return discoverer; - } - } - return null; - } + Grouping.AllocationState allocationState = null; + if (cmd.getAllocationState() != null && !cmd.getAllocationState().isEmpty()) { + try { + allocationState = Grouping.AllocationState.valueOf(cmd.getAllocationState()); + } catch (IllegalArgumentException ex) { + throw new InvalidParameterValueException("Unable to resolve Allocation State '" + cmd.getAllocationState() + "' to a supported state"); + } + } + if (allocationState == null) { + allocationState = Grouping.AllocationState.Enabled; + } - @Override - public List discoverHosts(AddHostCmd cmd) - throws IllegalArgumentException, DiscoveryException, - InvalidParameterValueException { - Long dcId = cmd.getZoneId(); - Long podId = cmd.getPodId(); - Long clusterId = cmd.getClusterId(); - String clusterName = cmd.getClusterName(); - String url = cmd.getUrl(); - String username = cmd.getUsername(); - String password = cmd.getPassword(); - Long memCapacity = cmd.getMemCapacity(); - Long cpuSpeed = cmd.getCpuSpeed(); - Long cpuNum = cmd.getCpuNum(); - String mac = cmd.getMac(); - List hostTags = cmd.getHostTags(); - MapbareMetalParams = new HashMap(); - - dcId = _accountMgr.checkAccessAndSpecifyAuthority(UserContext.current().getCaller(), dcId); - - // this is for standalone option - if (clusterName == null && clusterId == null) { - clusterName = "Standalone-" + url; - } - - if (cmd.getHypervisor().equalsIgnoreCase(Hypervisor.HypervisorType.BareMetal.toString())) { - if (memCapacity == null) { - memCapacity = Long.valueOf(0); - } - if (cpuSpeed == null) { - cpuSpeed = Long.valueOf(0); - } - if (cpuNum == null) { - cpuNum = Long.valueOf(0); - } - if (mac == null) { - mac = "unknown"; - } - - bareMetalParams.put("cpuNum", cpuNum.toString()); - bareMetalParams.put("cpuCapacity", cpuSpeed.toString()); - bareMetalParams.put("memCapacity", memCapacity.toString()); - bareMetalParams.put("mac", mac); - if (hostTags != null) { - bareMetalParams.put("hostTag", hostTags.get(0)); - } - } - - String allocationState = cmd.getAllocationState(); - if (allocationState == null) { - allocationState = Host.HostAllocationState.Enabled.toString(); - } - - return discoverHostsFull(dcId, podId, clusterId, clusterName, url, - username, password, cmd.getHypervisor(), hostTags, bareMetalParams, allocationState); - } + Discoverer discoverer = getMatchingDiscover(hypervisorType); + if (discoverer == null) { - @Override - public List discoverHosts(AddSecondaryStorageCmd cmd) - throws IllegalArgumentException, DiscoveryException, - InvalidParameterValueException { - Long dcId = cmd.getZoneId(); - String url = cmd.getUrl(); - return discoverHosts(dcId, null, null, null, url, null, null, - "SecondaryStorage", null); - } + throw new InvalidParameterValueException("Could not find corresponding resource manager for " + cmd.getHypervisor()); + } - @Override - public List discoverHosts(Long dcId, Long podId, Long clusterId, - String clusterName, String url, String username, String password, - String hypervisorType, List hostTags) throws IllegalArgumentException, - DiscoveryException, InvalidParameterValueException { - return discoverHostsFull(dcId, podId, clusterId, clusterName, url, username, password, hypervisorType, hostTags, null, null); - } - - - private List discoverHostsFull(Long dcId, Long podId, Long clusterId, - String clusterName, String url, String username, String password, - String hypervisorType, ListhostTags, Mapparams, String allocationState) throws IllegalArgumentException, - DiscoveryException, InvalidParameterValueException { - URI uri = null; + List result = new ArrayList(); - // Check if the zone exists in the system - DataCenterVO zone = _dcDao.findById(dcId); - if (zone == null) { - throw new InvalidParameterValueException("Can't find zone by id " - + dcId); - } - - Account account = UserContext.current().getCaller(); - if(Grouping.AllocationState.Disabled == zone.getAllocationState() && !_accountMgr.isRootAdmin(account.getType())){ - throw new PermissionDeniedException("Cannot perform this operation, Zone is currently disabled: "+ dcId ); - } + long clusterId = 0; + ClusterVO cluster = new ClusterVO(dcId, podId, clusterName); + cluster.setHypervisorType(cmd.getHypervisor()); - // Check if the pod exists in the system - if (podId != null) { - if (_podDao.findById(podId) == null) { - throw new InvalidParameterValueException( - "Can't find pod by id " + podId); - } - // check if pod belongs to the zone - HostPodVO pod = _podDao.findById(podId); - if (!Long.valueOf(pod.getDataCenterId()).equals(dcId)) { - throw new InvalidParameterValueException("Pod " + podId - + " doesn't belong to the zone " + dcId); - } - } + cluster.setClusterType(clusterType); + cluster.setAllocationState(allocationState); + try { + cluster = _clusterDao.persist(cluster); + } catch (Exception e) { + // no longer tolerate exception during the cluster creation phase + throw new CloudRuntimeException("Unable to create cluster " + clusterName + " in pod " + podId + " and data center " + dcId, e); + } + clusterId = cluster.getId(); + result.add(cluster); - // Deny to add a secondary storage multiple times for the same zone - if ((username == null) - && (_hostDao.findSecondaryStorageHost(dcId) != null)) { - throw new InvalidParameterValueException( - "A secondary storage host already exists in the specified zone"); - } + if (clusterType == Cluster.ClusterType.CloudManaged) { + return result; + } - // Verify cluster information and create a new cluster if needed - if (clusterName != null && clusterId != null) { - throw new InvalidParameterValueException( - "Can't specify cluster by both id and name"); - } + // save cluster details for later cluster/host cross-checking + Map details = new HashMap(); + details.put("url", url); + details.put("username", username); + details.put("password", password); + _clusterDetailsDao.persist(cluster.getId(), details); - if (hypervisorType == null || hypervisorType.isEmpty()) { - throw new InvalidParameterValueException( - "Need to specify Hypervisor Type"); - } + boolean success = false; + try { + try { + uri = new URI(UriUtils.encodeURIComponent(url)); + if (uri.getScheme() == null) { + throw new InvalidParameterValueException("uri.scheme is null " + url + ", add http:// as a prefix"); + } else if (uri.getScheme().equalsIgnoreCase("http")) { + if (uri.getHost() == null || uri.getHost().equalsIgnoreCase("") || uri.getPath() == null || uri.getPath().equalsIgnoreCase("")) { + throw new InvalidParameterValueException("Your host and/or path is wrong. Make sure it's of the format http://hostname/path"); + } + } + } catch (URISyntaxException e) { + throw new InvalidParameterValueException(url + " is not a valid uri"); + } - if ((clusterName != null || clusterId != null) && podId == null) { - throw new InvalidParameterValueException( - "Can't specify cluster without specifying the pod"); - } + List hosts = new ArrayList(); + Map> resources = null; - if (clusterId != null) { - if (_clusterDao.findById(clusterId) == null) { - throw new InvalidParameterValueException( - "Can't find cluster by id " + clusterId); - } - } + try { + resources = discoverer.find(dcId, podId, clusterId, uri, username, password); + } catch (Exception e) { + s_logger.info("Exception in external cluster discovery process with discoverer: " + discoverer.getName()); + } + if (resources != null) { + for (Map.Entry> entry : resources.entrySet()) { + ServerResource resource = entry.getKey(); - if (clusterName != null) { - ClusterVO cluster = new ClusterVO(dcId, podId, clusterName); - cluster.setHypervisorType(hypervisorType); - try { - cluster = _clusterDao.persist(cluster); - } catch (Exception e) { - cluster = _clusterDao.findBy(clusterName, podId); - if (cluster == null) { - throw new CloudRuntimeException("Unable to create cluster " - + clusterName + " in pod " + podId - + " and data center " + dcId, e); - } - } - clusterId = cluster.getId(); - } + // For Hyper-V, we are here means agent have already started and connected to management server + if (hypervisorType == Hypervisor.HypervisorType.Hyperv) { + break; + } - try { - uri = new URI(UriUtils.encodeURIComponent(url)); - if (uri.getScheme() == null) { - throw new InvalidParameterValueException("uri.scheme is null " - + url + ", add nfs:// as a prefix"); - } else if (uri.getScheme().equalsIgnoreCase("nfs")) { - if (uri.getHost() == null || uri.getHost().equalsIgnoreCase("") - || uri.getPath() == null - || uri.getPath().equalsIgnoreCase("")) { - throw new InvalidParameterValueException( - "Your host and/or path is wrong. Make sure it's of the format nfs://hostname/path"); - } - } - } catch (URISyntaxException e) { - throw new InvalidParameterValueException(url - + " is not a valid uri"); - } + AgentAttache attache = simulateStart(resource, entry.getValue(), true, null, null); + if (attache != null) { + hosts.add(_hostDao.findById(attache.getId())); + } + discoverer.postDiscovery(hosts, _nodeId); + } + s_logger.info("External cluster has been successfully discovered by " + discoverer.getName()); + success = true; + return result; + } - List hosts = new ArrayList(); - s_logger.info("Trying to add a new host at " + url + " in data center " - + dcId); - Enumeration en = _discoverers.enumeration(); - boolean isHypervisorTypeSupported = false; - while (en.hasMoreElements()) { - Discoverer discoverer = en.nextElement(); - if (params != null) { - discoverer.putParam(params); - } - - if (!discoverer.matchHypervisor(hypervisorType)) { - continue; - } - isHypervisorTypeSupported = true; - Map> resources = null; + s_logger.warn("Unable to find the server resources at " + url); + throw new DiscoveryException("Unable to add the external cluster"); + } catch (Throwable e) { + s_logger.error("Unexpected exception ", e); + throw new DiscoveryException("Unable to add the external cluster due to unhandled exception"); + } finally { + if (!success) { + _clusterDetailsDao.deleteDetails(clusterId); + _clusterDao.remove(clusterId); + } + } + } - try { - resources = discoverer.find(dcId, podId, clusterId, uri, - username, password); - } catch (DiscoveredWithErrorException e){ - throw e; - }catch (Exception e) { - s_logger.info("Exception in host discovery process with discoverer: " - + discoverer.getName() - + ", skip to another discoverer if there is any"); - } - if (resources != null) { - for (Map.Entry> entry : resources - .entrySet()) { - ServerResource resource = entry.getKey(); - /* - * For KVM, if we go to here, that means kvm agent is - * already connected to mgt svr. - */ - if (resource instanceof KvmDummyResourceBase) { - Map details = entry.getValue(); - String guid = details.get("guid"); - List kvmHosts = _hostDao.listBy( - Host.Type.Routing, clusterId, podId, dcId); - for (HostVO host : kvmHosts) { - if (host.getGuid().equalsIgnoreCase(guid)) { - hosts.add(host); - return hosts; - } - } - return null; - } - AgentAttache attache = simulateStart(resource, - entry.getValue(), true, hostTags, allocationState); - if (attache != null) { - hosts.add(_hostDao.findById(attache.getId())); - } - discoverer.postDiscovery(hosts, _nodeId); + private Discoverer getMatchingDiscover(Hypervisor.HypervisorType hypervisorType) { + Enumeration en = _discoverers.enumeration(); + while (en.hasMoreElements()) { + Discoverer discoverer = en.nextElement(); + if (discoverer.getHypervisorType() == hypervisorType) { + return discoverer; + } + } + return null; + } - } - s_logger.info("server resources successfully discovered by " - + discoverer.getName()); - return hosts; - } - } - if (!isHypervisorTypeSupported) { - String msg = "Do not support HypervisorType " + hypervisorType - + " for " + url; - s_logger.warn(msg); - throw new DiscoveryException(msg); - } - s_logger.warn("Unable to find the server resources at " + url); - throw new DiscoveryException("Unable to add the host"); - } + @Override + public List discoverHosts(AddHostCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { + Long dcId = cmd.getZoneId(); + Long podId = cmd.getPodId(); + Long clusterId = cmd.getClusterId(); + String clusterName = cmd.getClusterName(); + String url = cmd.getUrl(); + String username = cmd.getUsername(); + String password = cmd.getPassword(); + Long memCapacity = cmd.getMemCapacity(); + Long cpuSpeed = cmd.getCpuSpeed(); + Long cpuNum = cmd.getCpuNum(); + String mac = cmd.getMac(); + List hostTags = cmd.getHostTags(); + Map bareMetalParams = new HashMap(); - @Override - @DB - public boolean deleteCluster(DeleteClusterCmd cmd) - throws InvalidParameterValueException { - Transaction txn = Transaction.currentTxn(); - try { - txn.start(); - ClusterVO cluster = _clusterDao.lockRow(cmd.getId(), true); - if (cluster == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cluster: " + cmd.getId() - + " does not even exist. Delete call is ignored."); - } - txn.rollback(); - return true; - } + dcId = _accountMgr.checkAccessAndSpecifyAuthority(UserContext.current().getCaller(), dcId); - List hosts = _hostDao.listByCluster(cmd.getId()); - if (hosts.size() > 0) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cluster: " + cmd.getId() - + " still has hosts"); - } - txn.rollback(); - return false; - } + // this is for standalone option + if (clusterName == null && clusterId == null) { + clusterName = "Standalone-" + url; + } - _clusterDao.remove(cmd.getId()); + if (cmd.getHypervisor().equalsIgnoreCase(Hypervisor.HypervisorType.BareMetal.toString())) { + if (memCapacity == null) { + memCapacity = Long.valueOf(0); + } + if (cpuSpeed == null) { + cpuSpeed = Long.valueOf(0); + } + if (cpuNum == null) { + cpuNum = Long.valueOf(0); + } + if (mac == null) { + mac = "unknown"; + } - txn.commit(); - return true; - } catch (Throwable t) { - s_logger.error("Unable to delete cluster: " + cmd.getId(), t); - txn.rollback(); - return false; - } - } - - @Override - @DB - public Cluster updateCluster(Cluster clusterToUpdate, String clusterType, String hypervisor, String allocationState) - throws InvalidParameterValueException { - - ClusterVO cluster = (ClusterVO)clusterToUpdate; - // Verify cluster information and update the cluster if needed - boolean doUpdate = false; + bareMetalParams.put("cpuNum", cpuNum.toString()); + bareMetalParams.put("cpuCapacity", cpuSpeed.toString()); + bareMetalParams.put("memCapacity", memCapacity.toString()); + bareMetalParams.put("mac", mac); + if (hostTags != null) { + bareMetalParams.put("hostTag", hostTags.get(0)); + } + } - if (hypervisor != null && !hypervisor.isEmpty()) { - Hypervisor.HypervisorType hypervisorType = Hypervisor.HypervisorType.getType(hypervisor); - if (hypervisorType == null) { - s_logger.error("Unable to resolve " + hypervisor + " to a valid supported hypervisor type"); - throw new InvalidParameterValueException("Unable to resolve " + hypervisor + " to a supported type"); - }else{ - cluster.setHypervisorType(hypervisor); - doUpdate = true; - } - } + String allocationState = cmd.getAllocationState(); + if (allocationState == null) { + allocationState = Host.HostAllocationState.Enabled.toString(); + } + return discoverHostsFull(dcId, podId, clusterId, clusterName, url, username, password, cmd.getHypervisor(), hostTags, bareMetalParams, allocationState); + } - Cluster.ClusterType newClusterType = null; - if (clusterType != null && !clusterType.isEmpty()) { - try{ - newClusterType = Cluster.ClusterType.valueOf(clusterType); - }catch(IllegalArgumentException ex){ - throw new InvalidParameterValueException("Unable to resolve " + clusterType + " to a supported type"); - } - if (newClusterType == null) { - s_logger.error("Unable to resolve " + clusterType + " to a valid supported cluster type"); - throw new InvalidParameterValueException("Unable to resolve " + clusterType + " to a supported type"); - }else{ - cluster.setClusterType(newClusterType); - doUpdate = true; - } - } - - Grouping.AllocationState newAllocationState = null; - if (allocationState != null && !allocationState.isEmpty()) { - try{ - newAllocationState = Grouping.AllocationState.valueOf(allocationState); - }catch(IllegalArgumentException ex){ - throw new InvalidParameterValueException("Unable to resolve Allocation State '" + allocationState + "' to a supported state"); - } - if (newAllocationState == null) { - s_logger.error("Unable to resolve " + allocationState + " to a valid supported allocation State"); - throw new InvalidParameterValueException("Unable to resolve " + allocationState + " to a supported state"); - }else{ - cluster.setAllocationState(newAllocationState); - doUpdate = true; - } - } - if(doUpdate){ - Transaction txn = Transaction.currentTxn(); - try { - txn.start(); - _clusterDao.update(cluster.getId(), cluster); - txn.commit(); - } catch(Exception e) { - s_logger.error("Unable to update cluster due to " + e.getMessage(), e); - throw new CloudRuntimeException("Failed to update cluster. Please contact Cloud Support."); - } - } - return cluster; - } + @Override + public List discoverHosts(AddSecondaryStorageCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { + Long dcId = cmd.getZoneId(); + String url = cmd.getUrl(); + return discoverHosts(dcId, null, null, null, url, null, null, "SecondaryStorage", null); + } - @Override - public Cluster getCluster(Long clusterId){ - return _clusterDao.findById(clusterId); - } - - @Override + @Override + public List discoverHosts(Long dcId, Long podId, Long clusterId, String clusterName, String url, String username, String password, String hypervisorType, List hostTags) + throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { + return discoverHostsFull(dcId, podId, clusterId, clusterName, url, username, password, hypervisorType, hostTags, null, null); + } + + private List discoverHostsFull(Long dcId, Long podId, Long clusterId, String clusterName, String url, String username, String password, String hypervisorType, List hostTags, + Map params, String allocationState) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { + URI uri = null; + + // Check if the zone exists in the system + DataCenterVO zone = _dcDao.findById(dcId); + if (zone == null) { + throw new InvalidParameterValueException("Can't find zone by id " + dcId); + } + + Account account = UserContext.current().getCaller(); + if (Grouping.AllocationState.Disabled == zone.getAllocationState() && !_accountMgr.isRootAdmin(account.getType())) { + throw new PermissionDeniedException("Cannot perform this operation, Zone is currently disabled: " + dcId); + } + + // Check if the pod exists in the system + if (podId != null) { + if (_podDao.findById(podId) == null) { + throw new InvalidParameterValueException("Can't find pod by id " + podId); + } + // check if pod belongs to the zone + HostPodVO pod = _podDao.findById(podId); + if (!Long.valueOf(pod.getDataCenterId()).equals(dcId)) { + throw new InvalidParameterValueException("Pod " + podId + " doesn't belong to the zone " + dcId); + } + } + + // Deny to add a secondary storage multiple times for the same zone + if ((username == null) && (_hostDao.findSecondaryStorageHost(dcId) != null)) { + throw new InvalidParameterValueException("A secondary storage host already exists in the specified zone"); + } + + // Verify cluster information and create a new cluster if needed + if (clusterName != null && clusterId != null) { + throw new InvalidParameterValueException("Can't specify cluster by both id and name"); + } + + if (hypervisorType == null || hypervisorType.isEmpty()) { + throw new InvalidParameterValueException("Need to specify Hypervisor Type"); + } + + if ((clusterName != null || clusterId != null) && podId == null) { + throw new InvalidParameterValueException("Can't specify cluster without specifying the pod"); + } + + if (clusterId != null) { + if (_clusterDao.findById(clusterId) == null) { + throw new InvalidParameterValueException("Can't find cluster by id " + clusterId); + } + } + + if (clusterName != null) { + ClusterVO cluster = new ClusterVO(dcId, podId, clusterName); + cluster.setHypervisorType(hypervisorType); + try { + cluster = _clusterDao.persist(cluster); + } catch (Exception e) { + cluster = _clusterDao.findBy(clusterName, podId); + if (cluster == null) { + throw new CloudRuntimeException("Unable to create cluster " + clusterName + " in pod " + podId + " and data center " + dcId, e); + } + } + clusterId = cluster.getId(); + } + + try { + uri = new URI(UriUtils.encodeURIComponent(url)); + if (uri.getScheme() == null) { + throw new InvalidParameterValueException("uri.scheme is null " + url + ", add nfs:// as a prefix"); + } else if (uri.getScheme().equalsIgnoreCase("nfs")) { + if (uri.getHost() == null || uri.getHost().equalsIgnoreCase("") || uri.getPath() == null || uri.getPath().equalsIgnoreCase("")) { + throw new InvalidParameterValueException("Your host and/or path is wrong. Make sure it's of the format nfs://hostname/path"); + } + } + } catch (URISyntaxException e) { + throw new InvalidParameterValueException(url + " is not a valid uri"); + } + + List hosts = new ArrayList(); + s_logger.info("Trying to add a new host at " + url + " in data center " + dcId); + Enumeration en = _discoverers.enumeration(); + boolean isHypervisorTypeSupported = false; + while (en.hasMoreElements()) { + Discoverer discoverer = en.nextElement(); + if (params != null) { + discoverer.putParam(params); + } + + if (!discoverer.matchHypervisor(hypervisorType)) { + continue; + } + isHypervisorTypeSupported = true; + Map> resources = null; + + try { + resources = discoverer.find(dcId, podId, clusterId, uri, username, password); + } catch (DiscoveredWithErrorException e) { + throw e; + } catch (Exception e) { + s_logger.info("Exception in host discovery process with discoverer: " + discoverer.getName() + ", skip to another discoverer if there is any"); + } + if (resources != null) { + for (Map.Entry> entry : resources.entrySet()) { + ServerResource resource = entry.getKey(); + /* + * For KVM, if we go to here, that means kvm agent is already connected to mgt svr. + */ + if (resource instanceof KvmDummyResourceBase) { + Map details = entry.getValue(); + String guid = details.get("guid"); + List kvmHosts = _hostDao.listBy(Host.Type.Routing, clusterId, podId, dcId); + for (HostVO host : kvmHosts) { + if (host.getGuid().equalsIgnoreCase(guid)) { + hosts.add(host); + return hosts; + } + } + return null; + } + AgentAttache attache = simulateStart(resource, entry.getValue(), true, hostTags, allocationState); + if (attache != null) { + hosts.add(_hostDao.findById(attache.getId())); + } + discoverer.postDiscovery(hosts, _nodeId); + + } + s_logger.info("server resources successfully discovered by " + discoverer.getName()); + return hosts; + } + } + if (!isHypervisorTypeSupported) { + String msg = "Do not support HypervisorType " + hypervisorType + " for " + url; + s_logger.warn(msg); + throw new DiscoveryException(msg); + } + s_logger.warn("Unable to find the server resources at " + url); + throw new DiscoveryException("Unable to add the host"); + } + + @Override + @DB + public boolean deleteCluster(DeleteClusterCmd cmd) throws InvalidParameterValueException { + Transaction txn = Transaction.currentTxn(); + try { + txn.start(); + ClusterVO cluster = _clusterDao.lockRow(cmd.getId(), true); + if (cluster == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cluster: " + cmd.getId() + " does not even exist. Delete call is ignored."); + } + txn.rollback(); + return true; + } + + List hosts = _hostDao.listByCluster(cmd.getId()); + if (hosts.size() > 0) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cluster: " + cmd.getId() + " still has hosts"); + } + txn.rollback(); + return false; + } + + _clusterDao.remove(cmd.getId()); + + txn.commit(); + return true; + } catch (Throwable t) { + s_logger.error("Unable to delete cluster: " + cmd.getId(), t); + txn.rollback(); + return false; + } + } + + @Override + @DB + public Cluster updateCluster(Cluster clusterToUpdate, String clusterType, String hypervisor, String allocationState) throws InvalidParameterValueException { + + ClusterVO cluster = (ClusterVO) clusterToUpdate; + // Verify cluster information and update the cluster if needed + boolean doUpdate = false; + + if (hypervisor != null && !hypervisor.isEmpty()) { + Hypervisor.HypervisorType hypervisorType = Hypervisor.HypervisorType.getType(hypervisor); + if (hypervisorType == null) { + s_logger.error("Unable to resolve " + hypervisor + " to a valid supported hypervisor type"); + throw new InvalidParameterValueException("Unable to resolve " + hypervisor + " to a supported type"); + } else { + cluster.setHypervisorType(hypervisor); + doUpdate = true; + } + } + + Cluster.ClusterType newClusterType = null; + if (clusterType != null && !clusterType.isEmpty()) { + try { + newClusterType = Cluster.ClusterType.valueOf(clusterType); + } catch (IllegalArgumentException ex) { + throw new InvalidParameterValueException("Unable to resolve " + clusterType + " to a supported type"); + } + if (newClusterType == null) { + s_logger.error("Unable to resolve " + clusterType + " to a valid supported cluster type"); + throw new InvalidParameterValueException("Unable to resolve " + clusterType + " to a supported type"); + } else { + cluster.setClusterType(newClusterType); + doUpdate = true; + } + } + + Grouping.AllocationState newAllocationState = null; + if (allocationState != null && !allocationState.isEmpty()) { + try { + newAllocationState = Grouping.AllocationState.valueOf(allocationState); + } catch (IllegalArgumentException ex) { + throw new InvalidParameterValueException("Unable to resolve Allocation State '" + allocationState + "' to a supported state"); + } + if (newAllocationState == null) { + s_logger.error("Unable to resolve " + allocationState + " to a valid supported allocation State"); + throw new InvalidParameterValueException("Unable to resolve " + allocationState + " to a supported state"); + } else { + cluster.setAllocationState(newAllocationState); + doUpdate = true; + } + } + if (doUpdate) { + Transaction txn = Transaction.currentTxn(); + try { + txn.start(); + _clusterDao.update(cluster.getId(), cluster); + txn.commit(); + } catch (Exception e) { + s_logger.error("Unable to update cluster due to " + e.getMessage(), e); + throw new CloudRuntimeException("Failed to update cluster. Please contact Cloud Support."); + } + } + return cluster; + } + + @Override + public Cluster getCluster(Long clusterId) { + return _clusterDao.findById(clusterId); + } + + @Override public Answer sendTo(Long dcId, HypervisorType type, Command cmd) { List clusters = _clusterDao.listByDcHyType(dcId, type.toString()); int retry = 0; - for( ClusterVO cluster : clusters ) { + for (ClusterVO cluster : clusters) { List hosts = _hostDao.listBy(Host.Type.Routing, cluster.getId(), null, dcId); - for ( HostVO host : hosts ) { + for (HostVO host : hosts) { retry++; - if ( retry > _retry ) { + if (retry > _retry) { return null; } Answer answer = null; try { - answer = easySend( host.getId(), cmd); - } catch (Exception e ) { + answer = easySend(host.getId(), cmd); + } catch (Exception e) { } - if ( answer != null ) { + if (answer != null) { return answer; } } - } + } return null; } - - @Override - @DB - public boolean deleteHost(long hostId) { - - //Check if there are vms running/starting/stopping on this host - List vms = _vmDao.listByHostId(hostId); - - if (!vms.isEmpty()) { - throw new CloudRuntimeException("Unable to delete the host as there are vms in " + vms.get(0).getState() + " state using this host"); - } - - Transaction txn = Transaction.currentTxn(); - try { - HostVO host = _hostDao.findById(hostId); - if (host == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Host: " + hostId - + " does not even exist. Delete call is ignored."); - } - return true; - } - if (host.getType() == Type.SecondaryStorage) { - return deleteSecondaryStorageHost(host); - } - if (s_logger.isDebugEnabled()) { - s_logger.debug("Delete Host: " + hostId + " Guid:" - + host.getGuid()); - } - if (host.getType() == Type.Routing) { - if (host.getHypervisorType() == HypervisorType.XenServer) { - if (host.getClusterId() != null) { - List hosts = _hostDao.listBy(Type.Routing, - host.getClusterId(), host.getPodId(), - host.getDataCenterId()); - hosts.add(host); - boolean success = true; - for (HostVO thost : hosts) { - long thostId = thost.getId(); - PoolEjectCommand eject = new PoolEjectCommand( - host.getGuid()); - Answer answer = easySend(thostId, eject); - if (answer != null && answer.getResult()) { - s_logger.debug("Eject Host: " + hostId + " from " - + thostId + " Succeed"); - success = true; - break; - } else { - success = false; - s_logger.debug("Eject Host: " - + hostId - + " from " - + thostId - + " failed due to " - + (answer != null ? answer.getDetails() - : "no answer")); - } - } - if (!success) { - String msg = "Unable to eject host " - + host.getGuid() - + " due to there is no host up in this cluster, please execute xe pool-eject host-uuid=" - + host.getGuid() + "in this host " - + host.getPrivateIpAddress(); - s_logger.info(msg); - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, - host.getDataCenterId(), host.getPodId(), - "Unable to eject host " + host.getGuid(), msg); - } - } - } else if (host.getHypervisorType() == HypervisorType.KVM) { - try { - ShutdownCommand cmd = new ShutdownCommand(ShutdownCommand.DeleteHost, null); - send(host.getId(), cmd); - } catch (AgentUnavailableException e) { - s_logger.debug("Sending ShutdownCommand failed: " + e.toString()); - } catch (OperationTimedoutException e) { - s_logger.debug("Sending ShutdownCommand failed: " + e.toString()); - } - } - } - txn.start(); - - _dcDao.releasePrivateIpAddress(host.getPrivateIpAddress(), - host.getDataCenterId(), null); - AgentAttache attache = findAttache(hostId); - if (attache != null) { - handleDisconnect(attache, Status.Event.Remove, false); - } - // delete host details - _hostDetailsDao.deleteDetails(hostId); - - host.setGuid(null); - Long clusterId = host.getClusterId(); - host.setClusterId(null); - _hostDao.update(host.getId(), host); - - _hostDao.remove(hostId); - if (clusterId != null) { - List hosts = _hostDao.listByCluster(clusterId); - if (hosts.size() == 0) { - ClusterVO cluster = _clusterDao.findById(clusterId); - cluster.setGuid(null); - _clusterDao.update(clusterId, cluster); - } - - } - - // delete the associated primary storage from db - ComponentLocator locator = ComponentLocator - .getLocator(ManagementServer.Name); - _storagePoolHostDao = locator.getDao(StoragePoolHostDao.class); - if (_storagePoolHostDao == null) { - throw new ConfigurationException( - "Unable to get storage pool host dao: " - + StoragePoolHostDao.class); - } - // 1. Get the pool_ids from the host ref table - ArrayList pool_ids = _storagePoolHostDao.getPoolIds(hostId); - - // 2.Delete the associated entries in host ref table - _storagePoolHostDao.deletePrimaryRecordsForHost(hostId); - - // 3.For pool ids you got, delete entries in pool table where - // type='FileSystem' || 'LVM' - for (Long poolId : pool_ids) { - StoragePoolVO storagePool = _storagePoolDao.findById(poolId); - if (storagePool.isLocal()) { - storagePool.setUuid(null); - storagePool.setClusterId(null); - _storagePoolDao.update(poolId, storagePool); - _storagePoolDao.remove(poolId); - } - } - - //delete the op_host_capacity entry - Object[] capacityTypes = {Capacity.CAPACITY_TYPE_CPU,Capacity.CAPACITY_TYPE_MEMORY}; - SearchCriteria hostCapacitySC = _capacityDao.createSearchCriteria(); - hostCapacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, hostId); - hostCapacitySC.addAnd("capacityType", SearchCriteria.Op.IN, capacityTypes); - _capacityDao.remove(hostCapacitySC); - - txn.commit(); - return true; - } catch (Throwable t) { - s_logger.error("Unable to delete host: " + hostId, t); - return false; - } - } - - @Override - public boolean deleteHost(DeleteHostCmd cmd) - throws InvalidParameterValueException { - Long id = cmd.getId(); - - // Verify that host exists - HostVO host = _hostDao.findById(id); - if (host == null) { - throw new InvalidParameterValueException("Host with id " - + id.toString() + " doesn't exist"); - } - _accountMgr.checkAccessAndSpecifyAuthority(UserContext.current().getCaller(), host.getDataCenterId()); - return deleteHost(id); - } - - @DB - protected boolean deleteSecondaryStorageHost(HostVO secStorageHost) { - long zoneId = secStorageHost.getDataCenterId(); - long hostId = secStorageHost.getId(); - Transaction txn = Transaction.currentTxn(); - try { - - List allVmsInZone = _vmDao.listByZoneId(zoneId); - if (!allVmsInZone.isEmpty()) { - s_logger.warn("Cannot delete secondary storage host when there are " - + allVmsInZone.size() + " vms in zone " + zoneId); - return false; - } - txn.start(); - - if (!_hostDao.updateStatus(secStorageHost, - Event.MaintenanceRequested, _nodeId)) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Unable to take host " + hostId - + " into maintenance mode. Delete call is ignored"); - } - return false; - } - if (!_hostDao.updateStatus(secStorageHost, - Event.PreparationComplete, _nodeId)) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Unable to take host " + hostId - + " into maintenance mode. Delete call is ignored"); - } - return false; - } - - AgentAttache attache = findAttache(hostId); - if (attache != null) { - handleDisconnect(attache, Status.Event.Remove, false); - } - // now delete the host - secStorageHost.setGuid(null); - _hostDao.update(secStorageHost.getId(), secStorageHost); - _hostDao.remove(secStorageHost.getId()); - - // delete the templates associated with this host - SearchCriteria templateHostSC = _vmTemplateHostDao - .createSearchCriteria(); - templateHostSC.addAnd("hostId", SearchCriteria.Op.EQ, - secStorageHost.getId()); - _vmTemplateHostDao.remove(templateHostSC); - - //delete the op_host_capacity entry - SearchCriteria secStorageCapacitySC = _capacityDao.createSearchCriteria(); - secStorageCapacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, - secStorageHost.getId()); - secStorageCapacitySC.addAnd("capacityType", SearchCriteria.Op.EQ, - Capacity.CAPACITY_TYPE_SECONDARY_STORAGE); - _capacityDao.remove(secStorageCapacitySC); - - - /* Disconnected agent needs special handling here */ - secStorageHost.setGuid(null); - - txn.commit(); - return true; - } catch (Throwable t) { - s_logger.error("Unable to delete sec storage host: " - + secStorageHost.getId(), t); - return false; - } - } - - @Override - public boolean isVirtualMachineUpgradable(final UserVm vm, - final ServiceOffering offering) { - Enumeration en = _hostAllocators.enumeration(); - boolean isMachineUpgradable = true; - while (isMachineUpgradable && en.hasMoreElements()) { - final HostAllocator allocator = en.nextElement(); - isMachineUpgradable = allocator.isVirtualMachineUpgradable(vm, - offering); - } - - return isMachineUpgradable; - } - - protected int getPingInterval() { - return _pingInterval; - } - - @Override - public Answer send(Long hostId, Command cmd, int timeout) - throws AgentUnavailableException, OperationTimedoutException { - Commands cmds = new Commands(OnError.Revert); - cmds.addCommand(cmd); - send(hostId, cmds, timeout); - Answer[] answers = cmds.getAnswers(); - if (answers != null && !(answers[0] instanceof UnsupportedAnswer)) { - return answers[0]; - } - - if (answers != null && (answers[0] instanceof UnsupportedAnswer)) { - s_logger.warn("Unsupported Command: " + answers[0].getDetails()); - return answers[0]; - } - - return null; - } - - @DB - protected boolean noDbTxn() { - Transaction txn = Transaction.currentTxn(); - return !txn.dbTxnStarted(); - } - - @Override - public Answer[] send(Long hostId, Commands commands, int timeout) - throws AgentUnavailableException, OperationTimedoutException { - assert hostId != null : "Who's not checking the agent id before sending? ... (finger wagging)"; - if (hostId == null) { - throw new AgentUnavailableException(-1); - } - - //assert noDbTxn() : "I know, I know. Why are we so strict as to not allow txn across an agent call? ... Why are we so cruel ... Why are we such a dictator .... Too bad... Sorry...but NO AGENT COMMANDS WRAPPED WITHIN DB TRANSACTIONS!"; - - Command[] cmds = commands.toCommands(); - - assert cmds.length > 0 : "Ask yourself this about a hundred times. Why am I sending zero length commands?"; - - if (cmds.length == 0) { - commands.setAnswers(new Answer[0]); - } - - final AgentAttache agent = getAttache(hostId); - if (agent == null || agent.isClosed()) { - throw new AgentUnavailableException("agent not logged into this management server", hostId); - } - - long seq = _hostDao.getNextSequence(hostId); - Request req = new Request(seq, hostId, _nodeId, cmds, - commands.stopOnError(), true, commands.revertOnError()); - Answer[] answers = agent.send(req, timeout); - notifyAnswersToMonitors(hostId, seq, answers); - commands.setAnswers(answers); - return answers; - } - - protected Status investigate(AgentAttache agent) { - Long hostId = agent.getId(); - if (s_logger.isDebugEnabled()) { - s_logger.debug("checking if agent (" + hostId + ") is alive"); - } - - try { - long seq = _hostDao.getNextSequence(hostId); - Request req = new Request(seq, hostId, _nodeId, - new CheckHealthCommand(), true); - Answer[] answers = agent.send(req, 50 * 1000); - if (answers != null && answers[0] != null) { - Status status = answers[0].getResult() ? Status.Up - : Status.Down; - if (s_logger.isDebugEnabled()) { - s_logger.debug("agent (" - + hostId - + ") responded to checkHeathCommand, reporting that agent is " - + status); - } - return status; - } - } catch (AgentUnavailableException e) { - s_logger.debug("Agent is unavailable so we move on."); - } catch (OperationTimedoutException e) { - s_logger.debug("Timed Out " + e.getMessage()); - } - - return _haMgr.investigate(hostId); - } - - protected AgentAttache getAttache(final Long hostId) - throws AgentUnavailableException { - assert (hostId != null) : "Who didn't check their id value?"; - if (hostId == null) { - return null; - } - AgentAttache agent = findAttache(hostId); - if (agent == null) { - s_logger.debug("Unable to find agent for " + hostId); - throw new AgentUnavailableException("Unable to find agent ", hostId); - } - - return agent; - } - - @Override - public long send(Long hostId, Commands commands, Listener listener) - throws AgentUnavailableException { - final AgentAttache agent = getAttache(hostId); - if (agent.isClosed()) { - return -1; - } - - Command[] cmds = commands.toCommands(); - - assert cmds.length > 0 : "Why are you sending zero length commands?"; - if (cmds.length == 0) { - return -1; - } - long seq = _hostDao.getNextSequence(hostId); - Request req = new Request(seq, hostId, _nodeId, cmds, - commands.stopOnError(), true, commands.revertOnError()); - agent.send(req, listener); - return seq; - } - - @Override - public long gatherStats(final Long hostId, final Command cmd, - final Listener listener) { - try { - return send(hostId, new Commands(cmd), listener); - } catch (final AgentUnavailableException e) { - return -1; - } - } - - public void removeAgent(AgentAttache attache, Status nextState) { - if (attache == null) { - return; - } - long hostId = attache.getId(); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Remove Agent : " + hostId); - } - AgentAttache removed = null; - boolean conflict = false; - synchronized (_agents) { - removed = _agents.remove(hostId); - if (removed != null && removed != attache) { - conflict = true; - _agents.put(hostId, removed); - removed = attache; - } - } - if (conflict) { - s_logger.debug("Agent for host " + hostId - + " is created when it is being disconnected"); - } - if (removed != null) { - removed.disconnect(nextState); - } - } - - @Override - public void disconnect(final long hostId, final Status.Event event, - final boolean investigate) { - AgentAttache attache = findAttache(hostId); - - if (attache != null) { - disconnect(attache, event, investigate); - } else { - HostVO host = _hostDao.findById(hostId); - if (host != null && host.getRemoved() == null) { - if (event != null && event.equals(Event.Remove)) { - host.setGuid(null); - host.setClusterId(null); - } - _hostDao.updateStatus(host, event, _nodeId); - } - } - } - - public void disconnect(AgentAttache attache, final Status.Event event, - final boolean investigate) { - _executor.submit(new DisconnectTask(attache, event, investigate)); - } - - protected boolean handleDisconnect(AgentAttache attache, - Status.Event event, boolean investigate) { - if (attache == null) { - return true; - } - - long hostId = attache.getId(); - - s_logger.info("Host " + hostId + " is disconnecting with event " - + event.toString()); - - HostVO host = _hostDao.findById(hostId); - if (host == null) { - s_logger.warn("Can't find host with " + hostId); - removeAgent(attache, Status.Removed); - return true; - - } - final Status currentState = host.getStatus(); - if (currentState == Status.Down || currentState == Status.Alert - || currentState == Status.Removed - || currentState == Status.PrepareForMaintenance) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Host " + hostId + " is already " - + currentState.toString()); - } - if (currentState != Status.PrepareForMaintenance) { - removeAgent(attache, currentState); - } - return true; - } - Status nextState = currentState.getNextStatus(event); - if (nextState == null) { - if (!(attache instanceof DirectAgentAttache)) { - return false; - } - - s_logger.debug("There is no transition from state " - + currentState.toString() + " and event " - + event.toString()); - assert false : "How did we get here. Look at the FSM"; - return false; - } - - if (s_logger.isDebugEnabled()) { - s_logger.debug("The next state is " + nextState.toString() - + ", current state is " + currentState); - } - - // Now we go and correctly diagnose what the actual situation is - if (nextState == Status.Alert && investigate) { - s_logger.info("Investigating why host " + hostId - + " has disconnected with event " + event.toString()); - - final Status determinedState = investigate(attache); - s_logger.info("The state determined is " - + (determinedState != null ? determinedState.toString() - : "undeterminable")); - - if (determinedState == null || determinedState == Status.Down) { - s_logger.error("Host is down: " + host.getId() + "-" - + host.getName() + ". Starting HA on the VMs"); - - event = Event.HostDown; - } else if (determinedState == Status.Up) { - // we effectively pinged from the server here. - s_logger.info("Agent is determined to be up and running"); - _hostDao.updateStatus(host, Event.Ping, _nodeId); - return false; - } else if (determinedState == Status.Disconnected) { - s_logger.warn("Agent is disconnected but the host is still up: " - + host.getId() + "-" + host.getName()); - if (currentState == Status.Disconnected) { - if (((System.currentTimeMillis() >> 10) - host - .getLastPinged()) > _alertWait) { - s_logger.warn("Host " - + host.getId() - + " has been disconnected pass the time it should be disconnected."); - event = Event.WaitedTooLong; - } else { - s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); - return false; - } - } else if (currentState == Status.Updating) { - if (((System.currentTimeMillis() >> 10) - host - .getLastPinged()) > _updateWait) { - s_logger.warn("Host " + host.getId() - + " has been updating for too long"); - - event = Event.WaitedTooLong; - } else { - s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); - return false; - } - } else if (currentState == Status.Up) { - DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); - HostPodVO podVO = _podDao.findById(host.getPodId()); - String hostDesc = "name: " + host.getName() + " (id:" - + host.getId() + "), availability zone: " - + dcVO.getName() + ", pod: " + podVO.getName(); - if((host.getType() != Host.Type.SecondaryStorage) && (host.getType() != Host.Type.ConsoleProxy)){ - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, - host.getDataCenterId(), host.getPodId(), - "Host disconnected, " + hostDesc, - "If the agent for host [" + hostDesc - + "] is not restarted within " + _alertWait - + " seconds, HA will begin on the VMs"); - } - event = Event.AgentDisconnected; - } - } else { - // if we end up here we are in alert state, send an alert - DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); - HostPodVO podVO = _podDao.findById(host.getPodId()); - String hostDesc = "name: " + host.getName() + " (id:" - + host.getId() + "), availability zone: " - + dcVO.getName() + ", pod: " + podVO.getName(); - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, - host.getDataCenterId(), host.getPodId(), - "Host in ALERT state, " + hostDesc, - "In availability zone " + host.getDataCenterId() - + ", host is in alert state: " + host.getId() - + "-" + host.getName()); - } - } - - if (s_logger.isDebugEnabled()) { - s_logger.debug("Deregistering link for " + hostId + " with state " - + nextState); - } - - _hostDao.disconnect(host, event, _nodeId); - - removeAgent(attache, nextState); - - host = _hostDao.findById(host.getId()); - if (host.getStatus() == Status.Alert || host.getStatus() == Status.Down) { - _haMgr.scheduleRestartForVmsOnHost(host); - } - - for (Pair monitor : _hostMonitors) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Sending Disconnect to listener: " - + monitor.second().getClass().getName()); - } - monitor.second().processDisconnect(hostId, nextState); - } - - return true; - } - - protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, - final StartupCommand[] cmd) throws ConnectionException { - long hostId = attache.getId(); - HostVO host = _hostDao.findById(hostId); - for (Pair monitor : _hostMonitors) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Sending Connect to listener: " - + monitor.second().getClass().getSimpleName()); - } - for (int i = 0; i < cmd.length; i++) { - try { - monitor.second().processConnect(host, cmd[i]); - } catch (ConnectionException e) { - if (e.isSetupError()) { - s_logger.warn("Monitor " - + monitor.second().getClass().getSimpleName() - + " says there is an error in the connect process for " - + hostId + " due to " + e.getMessage()); - handleDisconnect(attache, Event.AgentDisconnected, - false); - throw e; - } else { - s_logger.info("Monitor " - + monitor.second().getClass().getSimpleName() - + " says not to continue the connect process for " - + hostId + " due to " + e.getMessage()); - handleDisconnect(attache, Event.ShutdownRequested, - false); - return attache; - } - } - } - } - - Long dcId = host.getDataCenterId(); - ReadyCommand ready = new ReadyCommand(dcId); - Answer answer = easySend(hostId, ready); - if (answer == null || !answer.getResult()) { - // this is tricky part for secondary storage - // make it as disconnected, wait for secondary storage VM to be up - // return the attache instead of null, even it is disconnectede - handleDisconnect(attache, Event.AgentDisconnected, false); - } - - _hostDao.updateStatus(host, Event.Ready, _nodeId); - attache.ready(); - return attache; - } - - @Override - public boolean start() { - startDirectlyConnectedHosts(); - if (_monitor != null) { - _monitor.start(); - } - if (_connection != null) { - _connection.start(); - } - - return true; - } - - public void startDirectlyConnectedHosts() { - List hosts = _hostDao.findDirectlyConnectedHosts(); - for (HostVO host : hosts) { - loadDirectlyConnectedHost(host); - } - } - - @SuppressWarnings("rawtypes") - protected void loadDirectlyConnectedHost(HostVO host) { - String resourceName = host.getResource(); - ServerResource resource = null; - try { - Class clazz = Class.forName(resourceName); - Constructor constructor = clazz.getConstructor(); - resource = (ServerResource) constructor.newInstance(); - } catch (ClassNotFoundException e) { - s_logger.warn("Unable to find class " + host.getResource(), e); - return; - } catch (InstantiationException e) { - s_logger.warn("Unablet to instantiate class " + host.getResource(), - e); - return; - } catch (IllegalAccessException e) { - s_logger.warn("Illegal access " + host.getResource(), e); - return; - } catch (SecurityException e) { - s_logger.warn("Security error on " + host.getResource(), e); - return; - } catch (NoSuchMethodException e) { - s_logger.warn( - "NoSuchMethodException error on " + host.getResource(), e); - return; - } catch (IllegalArgumentException e) { - s_logger.warn( - "IllegalArgumentException error on " + host.getResource(), - e); - return; - } catch (InvocationTargetException e) { - s_logger.warn( - "InvocationTargetException error on " + host.getResource(), - e); - return; - } - - _hostDao.loadDetails(host); - - HashMap params = new HashMap(host.getDetails().size() + 5); - params.putAll(host.getDetails()); - - params.put("guid", host.getGuid()); - params.put("zone", Long.toString(host.getDataCenterId())); - if (host.getPodId() != null) { - params.put("pod", Long.toString(host.getPodId())); - } - if (host.getClusterId() != null) { - params.put("cluster", Long.toString(host.getClusterId())); - String guid = null; - ClusterVO cluster = _clusterDao.findById(host.getClusterId()); - if (cluster.getGuid() == null) { - guid = host.getDetail("pool"); - } else { - guid = cluster.getGuid(); - } - if (guid == null || guid.isEmpty()) { - throw new CloudRuntimeException( - "Can not find guid for cluster " + cluster.getId() + " name " + cluster.getName()); - } - params.put("pool", guid); - } - - params.put("ipaddress", host.getPrivateIpAddress()); - params.put("secondary.storage.vm", "false"); - params.put("max.template.iso.size", - _configDao.getValue("max.template.iso.size")); - - try { - resource.configure(host.getName(), params); - } catch (ConfigurationException e) { - s_logger.warn("Unable to configure resource due to ", e); - return; - } - - if (!resource.start()) { - s_logger.warn("Unable to start the resource"); - return; - } - host.setLastPinged(System.currentTimeMillis() >> 10); - host.setManagementServerId(_nodeId); - _hostDao.update(host.getId(), host); - _executor.execute(new SimulateStartTask(host.getId(), resource, host - .getDetails(), null)); - } - - protected AgentAttache simulateStart(ServerResource resource, - Map details, boolean old, List hostTags, String allocationState) - throws IllegalArgumentException { - StartupCommand[] cmds = resource.initialize(); - if (cmds == null) { - return null; - } - - AgentAttache attache = null; - if (s_logger.isDebugEnabled()) { - new Request(0l, -1l, -1l, cmds, true, false, true).log(-1, "Startup request from directly connected host: "); -// s_logger.debug("Startup request from directly connected host: " -// + new Request(0l, -1l, -1l, cmds, true, false, true) -// .toString()); - } - try { - attache = handleDirectConnect(resource, cmds, details, old, hostTags, allocationState); - } catch (IllegalArgumentException ex) { - s_logger.warn("Unable to connect due to ", ex); - throw ex; - } catch (Exception e) { - s_logger.warn("Unable to connect due to ", e); - } - - if (attache == null) { - resource.disconnected(); - return null; - } - if (attache.isReady()) { - StartupAnswer[] answers = new StartupAnswer[cmds.length]; - for (int i = 0; i < answers.length; i++) { - answers[i] = new StartupAnswer(cmds[i], attache.getId(), - _pingInterval); - } - - attache.process(answers); - } - return attache; - } - - @Override - public boolean stop() { - if (_monitor != null) { - _monitor.signalStop(); - } - if (_connection != null) { - _connection.stop(); - } - - s_logger.info("Disconnecting agents: " + _agents.size()); - synchronized (_agents) { - for (final AgentAttache agent : _agents.values()) { - final HostVO host = _hostDao.findById(agent.getId()); - if (host == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cant not find host " + agent.getId()); - } - } else { - _hostDao.updateStatus(host, Event.ManagementServerDown, - _nodeId); - } - } - } - return true; - } - - @Override - public Pair findPod(final VirtualMachineTemplate template, - ServiceOfferingVO offering, final DataCenterVO dc, - final long accountId, Set avoids) { - final Enumeration en = _podAllocators.enumeration(); - while (en.hasMoreElements()) { - final PodAllocator allocator = (PodAllocator) en.nextElement(); - final Pair pod = allocator.allocateTo(template, - offering, dc, accountId, avoids); - if (pod != null) { - return pod; - } - } - return null; - } - - @Override - public HostStats getHostStatistics(long hostId) { - Answer answer = easySend(hostId, new GetHostStatsCommand(_hostDao - .findById(hostId).getGuid(), _hostDao.findById(hostId) - .getName(), hostId)); - - if (answer != null && (answer instanceof UnsupportedAnswer)) { - return null; - } - - if (answer == null || !answer.getResult()) { - String msg = "Unable to obtain host " + hostId + " statistics. "; - s_logger.warn(msg); - return null; - } else { - - // now construct the result object - if (answer instanceof GetHostStatsAnswer) { - return ((GetHostStatsAnswer) answer).getHostStats(); - } - } - return null; - } - - @Override - public Long getGuestOSCategoryId(long hostId) { - HostVO host = _hostDao.findById(hostId); - if (host == null) { - return null; - } else { - _hostDao.loadDetails(host); - DetailVO detail = _hostDetailsDao.findDetail(hostId, - "guest.os.category.id"); - if (detail == null) { - return null; - } else { - return Long.parseLong(detail.getValue()); - } - } - } - - @Override - public String getHostTags(long hostId){ - List hostTags = _hostTagsDao.gethostTags(hostId); - if (hostTags == null) { - return null; - } else { - return StringUtils.listToCsvTags(hostTags); - } + @Override + @DB + public boolean deleteHost(long hostId, boolean isForced, User caller) { + + // Check if the host exists + HostVO host = _hostDao.findById(hostId); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Host: " + hostId + " does not even exist. Delete call is ignored."); + } + return true; + } + + if (host.getType() == Type.SecondaryStorage) { + return deleteSecondaryStorageHost(host); + } + + AgentAttache attache = findAttache(hostId); + + try { + + if (host.getType() == Type.Routing) { + // Check if host is ready for removal + Status currentState = host.getStatus(); + Status nextState = currentState.getNextStatus(Status.Event.Remove); + if (nextState == null) { + if (!(attache instanceof DirectAgentAttache)) { + return false; + } + s_logger.debug("There is no transition from state " + currentState.toString() + " to state " + Status.Event.Remove.toString()); + return false; + } + + if (s_logger.isDebugEnabled()) { + s_logger.debug("Deleting Host: " + hostId + " Guid:" + host.getGuid()); + } + + // Check if there are vms running/starting/stopping on this host + List vms = _vmDao.listByHostId(hostId); + + if (!vms.isEmpty()) { + if (isForced) { + // Stop HA disabled vms and HA enabled vms in Stopping state + // Restart HA enabled vms + for (VMInstanceVO vm : vms) { + if (!vm.isHaEnabled() || vm.getState() == State.Stopping) { + s_logger.debug("Stopping vm: " + vm + " as a part of deleteHost id=" + hostId); + if (!_vmMgr.advanceStop(vm, true, caller, _accountMgr.getAccount(vm.getAccountId()))) { + String errorMsg = "There was an error stopping the vm: " + vm + " as a part of hostDelete id=" + hostId; + s_logger.warn(errorMsg); + throw new CloudRuntimeException(errorMsg); + } + } else if (vm.isHaEnabled() && (vm.getState() == State.Running || vm.getState() == State.Starting)) { + s_logger.debug("Scheduling restart for vm: " + vm + " " + vm.getState() + " on the host id=" + hostId); + _haMgr.scheduleRestart(vm, false); + } + } + } else { + throw new CloudRuntimeException("Unable to delete the host as there are vms in " + vms.get(0).getState() + " state using this host and isForced=false specified"); + } + } + + if (host.getHypervisorType() == HypervisorType.XenServer) { + if (host.getClusterId() != null) { + List hosts = _hostDao.listBy(Type.Routing, host.getClusterId(), host.getPodId(), host.getDataCenterId()); + hosts.add(host); + boolean success = true; + for (HostVO thost : hosts) { + long thostId = thost.getId(); + PoolEjectCommand eject = new PoolEjectCommand(host.getGuid()); + Answer answer = easySend(thostId, eject); + if (answer != null && answer.getResult()) { + s_logger.debug("Eject Host: " + hostId + " from " + thostId + " Succeed"); + success = true; + break; + } else { + success = false; + s_logger.warn("Eject Host: " + hostId + " from " + thostId + " failed due to " + (answer != null ? answer.getDetails() : "no answer")); + } + } + if (!success) { + String msg = "Unable to eject host " + host.getGuid() + " due to there is no host up in this cluster, please execute xe pool-eject host-uuid=" + host.getGuid() + + "in this host " + host.getPrivateIpAddress(); + s_logger.warn(msg); + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Unable to eject host " + host.getGuid(), msg); + } + } + } else if (host.getHypervisorType() == HypervisorType.KVM) { + try { + ShutdownCommand cmd = new ShutdownCommand(ShutdownCommand.DeleteHost, null); + send(host.getId(), cmd); + } catch (AgentUnavailableException e) { + s_logger.warn("Sending ShutdownCommand failed: ", e); + } catch (OperationTimedoutException e) { + s_logger.warn("Sending ShutdownCommand failed: ", e); + } + } + } + + Transaction txn = Transaction.currentTxn(); + txn.start(); + + _dcDao.releasePrivateIpAddress(host.getPrivateIpAddress(), host.getDataCenterId(), null); + + if (attache != null) { + handleDisconnect(attache, Status.Event.Remove, false); + } + // delete host details + _hostDetailsDao.deleteDetails(hostId); + + host.setGuid(null); + Long clusterId = host.getClusterId(); + host.setClusterId(null); + _hostDao.update(host.getId(), host); + + _hostDao.remove(hostId); + if (clusterId != null) { + List hosts = _hostDao.listByCluster(clusterId); + if (hosts.size() == 0) { + ClusterVO cluster = _clusterDao.findById(clusterId); + cluster.setGuid(null); + _clusterDao.update(clusterId, cluster); + } + } + + // delete the associated primary storage from db + ComponentLocator locator = ComponentLocator.getLocator(ManagementServer.Name); + _storagePoolHostDao = locator.getDao(StoragePoolHostDao.class); + if (_storagePoolHostDao == null) { + throw new ConfigurationException("Unable to get storage pool host dao: " + StoragePoolHostDao.class); + } + // 1. Get the pool_ids from the host ref table + ArrayList pool_ids = _storagePoolHostDao.getPoolIds(hostId); + + // 2.Delete the associated entries in host ref table + _storagePoolHostDao.deletePrimaryRecordsForHost(hostId); + + // 3.For pool ids you got, delete entries in pool table where + // type='FileSystem' || 'LVM' + for (Long poolId : pool_ids) { + StoragePoolVO storagePool = _storagePoolDao.findById(poolId); + if (storagePool.isLocal()) { + storagePool.setUuid(null); + storagePool.setClusterId(null); + _storagePoolDao.update(poolId, storagePool); + _storagePoolDao.remove(poolId); + } + } + + // delete the op_host_capacity entry + Object[] capacityTypes = { Capacity.CAPACITY_TYPE_CPU, Capacity.CAPACITY_TYPE_MEMORY }; + SearchCriteria hostCapacitySC = _capacityDao.createSearchCriteria(); + hostCapacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, hostId); + hostCapacitySC.addAnd("capacityType", SearchCriteria.Op.IN, capacityTypes); + _capacityDao.remove(hostCapacitySC); + + txn.commit(); + return true; + } catch (Throwable t) { + s_logger.error("Unable to delete host: " + hostId, t); + return false; + } } - - @Override - public String getName() { - return _name; - } - - protected class DisconnectTask implements Runnable { - AgentAttache _attache; - Status.Event _event; - boolean _investigate; - - DisconnectTask(final AgentAttache attache, final Status.Event event, - final boolean investigate) { - _attache = attache; - _event = event; - _investigate = investigate; - } - - @Override - public void run() { - try { - handleDisconnect(_attache, _event, _investigate); - } catch (final Exception e) { - s_logger.error("Exception caught while handling disconnect: ", - e); - } finally { - StackMaid.current().exitCleanup(); - } - } - } - - @Override - public Answer easySend(final Long hostId, final Command cmd) { - return easySend(hostId, cmd, _wait); - } - - @Override - public Answer easySend(final Long hostId, final Command cmd, int timeout) { - try { - Host h = _hostDao.findById(hostId); - if (h == null || h.getRemoved() != null) { - s_logger.debug("Host with id " + hostId.toString() - + " doesn't exist"); - return null; - } - Status status = h.getStatus(); - if (!status.equals(Status.Up) && !status.equals(Status.Connecting)) { - return null; - } - final Answer answer = send(hostId, cmd, timeout); - if (answer == null) { - s_logger.warn("send returns null answer"); - return null; - } - - if (!answer.getResult()) { - s_logger.warn("Unable to execute command: " + cmd.toString() - + " due to " + answer.getDetails()); - return null; - } - - if (s_logger.isDebugEnabled() && answer.getDetails() != null) { - s_logger.debug("Details from executing " - + cmd.getClass().toString() + ": " - + answer.getDetails()); - } - - return answer; - - } catch (final AgentUnavailableException e) { - s_logger.warn(e.getMessage()); - return null; - } catch (final OperationTimedoutException e) { - s_logger.warn("Operation timed out: " + e.getMessage()); - return null; - } catch (final Exception e) { - s_logger.warn("Exception while sending", e); - return null; - } - } - - @Override - public Answer send(final Long hostId, final Command cmd) - throws AgentUnavailableException, OperationTimedoutException { - return send(hostId, cmd, _wait); - } - - @Override - public Answer[] send(final Long hostId, Commands cmds) - throws AgentUnavailableException, OperationTimedoutException { - return send(hostId, cmds, _wait); - } - - @Override - public Host reconnectHost(ReconnectHostCmd cmd) - throws AgentUnavailableException { - Long hostId = cmd.getId(); - - HostVO host = _hostDao.findById(hostId); - if (host == null) { - throw new InvalidParameterValueException("Host with id " - + hostId.toString() + " doesn't exist"); - } - - boolean result = reconnect(hostId); - if (result) { - return host; - } - throw new CloudRuntimeException("Failed to reconnect host with id " + hostId.toString() - + ", internal error."); - } - - @Override - public boolean reconnect(final long hostId) - throws AgentUnavailableException { - HostVO host; - - host = _hostDao.findById(hostId); - if (host == null || host.getRemoved() != null) { - s_logger.warn("Unable to find host " + hostId); - return false; - } - - if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) { - s_logger.info("Unable to disconnect host because it is not in the correct state: host=" - + hostId + "; Status=" + host.getStatus()); - return false; - } - - AgentAttache attache = findAttache(hostId); - if (attache == null) { - s_logger.info("Unable to disconnect host because it is not connected to this server: " - + hostId); - return false; - } - - disconnect(attache, Event.ShutdownRequested, false); - return true; - } - - @Override - public boolean cancelMaintenance(final long hostId) { - - HostVO host; - host = _hostDao.findById(hostId); - if (host == null || host.getRemoved() != null) { - s_logger.warn("Unable to find host " + hostId); - return true; - } - - if (host.getStatus() != Status.PrepareForMaintenance - && host.getStatus() != Status.Maintenance - && host.getStatus() != Status.ErrorInMaintenance) { - return true; - } - - _haMgr.cancelScheduledMigrations(host); - List vms = _haMgr.findTakenMigrationWork(); - for (VMInstanceVO vm : vms) { - if (vm.getHostId() != null && vm.getHostId() == hostId) { - s_logger.info("Unable to cancel migration because the vm is being migrated: " - + vm.toString()); - return false; - } - } - disconnect(hostId, Event.ResetRequested, false); - return true; - } - - @Override - public Host cancelMaintenance(CancelMaintenanceCmd cmd) - throws InvalidParameterValueException { - Long hostId = cmd.getId(); - - // verify input parameters - HostVO host = _hostDao.findById(hostId); - if (host == null || host.getRemoved() != null) { - throw new InvalidParameterValueException("Host with id " - + hostId.toString() + " doesn't exist"); - } - - boolean success = cancelMaintenance(hostId); - if (!success) { - throw new CloudRuntimeException( - "Internal error cancelling maintenance."); - } - return host; - } - - @Override - public boolean executeUserRequest(long hostId, Event event) - throws AgentUnavailableException { - if (event == Event.MaintenanceRequested) { - return maintain(hostId); - } else if (event == Event.ResetRequested) { - return cancelMaintenance(hostId); - } else if (event == Event.Remove) { - return deleteHost(hostId); - } else if (event == Event.AgentDisconnected) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Received agent disconnect event for host " - + hostId); - } - AgentAttache attache = null; - attache = findAttache(hostId); - if (attache != null) { - handleDisconnect(attache, Event.AgentDisconnected, false); - } - return true; - } else if (event == Event.ShutdownRequested) { - return reconnect(hostId); - } - return false; - } - - @Override - public boolean maintain(final long hostId) throws AgentUnavailableException { - HostVO host = _hostDao.findById(hostId); - Status state; - - Answer answer = easySend(hostId, new MaintainCommand()); - if (answer == null || !answer.getResult()) { - s_logger.warn("Unable to put host in maintainance mode: " + hostId); - return false; - } - - // Let's put this guy in maintenance state - do { - host = _hostDao.findById(hostId); - if (host == null) { - s_logger.debug("Unable to find host " + hostId); - return false; - } - state = host.getStatus(); - if (state == Status.Disconnected || state == Status.Updating) { - s_logger.debug("Unable to put host " + hostId - + " in matinenance mode because it is currently in " - + state.toString()); - throw new AgentUnavailableException( - "Agent is in " - + state.toString() - + " state. Please wait for it to become Alert state try again.", - hostId); - } - } while (!_hostDao.updateStatus(host, Event.MaintenanceRequested, - _nodeId)); - - AgentAttache attache = findAttache(hostId); - if (attache != null) { - attache.setMaintenanceMode(true); - } - - if (attache != null) { - // Now cancel all of the commands except for the active one. - attache.cancelAllCommands(Status.PrepareForMaintenance, false); - } - - final Host.Type type = host.getType(); - - if (type == Host.Type.Routing) { - - final List vms = _vmDao.listByHostId(hostId); - if (vms.size() == 0) { - return true; - } - - List hosts = _hostDao.listBy(host.getClusterId(), host.getPodId(), host.getDataCenterId()); - - for (final VMInstanceVO vm : vms) { - if( hosts == null || hosts.size() <= 1) { - // for the last host in this cluster, stop all the VMs - _haMgr.scheduleStop(vm, hostId, WorkType.ForceStop); - } else { - _haMgr.scheduleMigration(vm); - } - } - } - - return true; - } - - @Override - public Host maintain(PrepareForMaintenanceCmd cmd) - throws InvalidParameterValueException { - Long hostId = cmd.getId(); - HostVO host = _hostDao.findById(hostId); - - if (host == null) { - s_logger.debug("Unable to find host " + hostId); - throw new InvalidParameterValueException("Unable to find host with ID: " + hostId + ". Please specify a valid host ID."); - } - - if (_hostDao.countBy(host.getClusterId(), Status.PrepareForMaintenance, Status.ErrorInMaintenance) > 0) { - throw new InvalidParameterValueException("There are other servers in PrepareForMaintenance OR ErrorInMaintenance STATUS in cluster " + host.getClusterId()); - } - - if (_storageMgr.isLocalStorageActiveOnHost(host)) { - throw new InvalidParameterValueException( - "There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage."); - } - - try { - if (maintain(hostId)) { - return _hostDao.findById(hostId); - } else { - throw new CloudRuntimeException( - "Unable to prepare for maintenance host " + hostId); - } - } catch (AgentUnavailableException e) { - throw new CloudRuntimeException( - "Unable to prepare for maintenance host " + hostId); - } - } - - public boolean checkCIDR(Host.Type type, HostPodVO pod, - String serverPrivateIP, String serverPrivateNetmask) { - if (serverPrivateIP == null) { - return true; - } - // Get the CIDR address and CIDR size - String cidrAddress = pod.getCidrAddress(); - long cidrSize = pod.getCidrSize(); - - // If the server's private IP address is not in the same subnet as the - // pod's CIDR, return false - String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); - String serverSubnet = NetUtils.getSubNet(serverPrivateIP, - serverPrivateNetmask); - if (!cidrSubnet.equals(serverSubnet)) { - return false; - } - - // If the server's private netmask is less inclusive than the pod's CIDR - // netmask, return false - String cidrNetmask = NetUtils - .getCidrSubNet("255.255.255.255", cidrSize); - long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); - long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); - if (serverNetmaskNumeric > cidrNetmaskNumeric) { - return false; - } - return true; - } - - protected void checkCIDR(Host.Type type, HostPodVO pod, DataCenterVO dc, - String serverPrivateIP, String serverPrivateNetmask) - throws IllegalArgumentException { - // Skip this check for Storage Agents and Console Proxies - if (type == Host.Type.Storage || type == Host.Type.ConsoleProxy) { - return; - } - - if (serverPrivateIP == null) { - return; - } - // Get the CIDR address and CIDR size - String cidrAddress = pod.getCidrAddress(); - long cidrSize = pod.getCidrSize(); - - // If the server's private IP address is not in the same subnet as the - // pod's CIDR, return false - String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); - String serverSubnet = NetUtils.getSubNet(serverPrivateIP, - serverPrivateNetmask); - if (!cidrSubnet.equals(serverSubnet)) { - s_logger.warn("The private ip address of the server (" - + serverPrivateIP - + ") is not compatible with the CIDR of pod: " - + pod.getName() + " and zone: " + dc.getName()); - throw new IllegalArgumentException( - "The private ip address of the server (" + serverPrivateIP - + ") is not compatible with the CIDR of pod: " - + pod.getName() + " and zone: " + dc.getName()); - } - - // If the server's private netmask is less inclusive than the pod's CIDR - // netmask, return false - String cidrNetmask = NetUtils - .getCidrSubNet("255.255.255.255", cidrSize); - long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); - long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); - if (serverNetmaskNumeric > cidrNetmaskNumeric) { - throw new IllegalArgumentException( - "The private ip address of the server (" + serverPrivateIP - + ") is not compatible with the CIDR of pod: " - + pod.getName() + " and zone: " + dc.getName()); - } - - } - - public void checkIPConflicts(Host.Type type, HostPodVO pod, - DataCenterVO dc, String serverPrivateIP, - String serverPrivateNetmask, String serverPublicIP, - String serverPublicNetmask) { - // If the server's private IP is the same as is public IP, this host has - // a host-only private network. Don't check for conflicts with the - // private IP address table. - if (serverPrivateIP != serverPublicIP) { - if (!_privateIPAddressDao.mark(dc.getId(), pod.getId(), - serverPrivateIP)) { - // If the server's private IP address is already in the - // database, return false - List existingPrivateIPs = _privateIPAddressDao - .listByPodIdDcIdIpAddress(pod.getId(), dc.getId(), - serverPrivateIP); - - assert existingPrivateIPs.size() <= 1 : " How can we get more than one ip address with " - + serverPrivateIP; - if (existingPrivateIPs.size() > 1) { - throw new IllegalArgumentException( - "The private ip address of the server (" - + serverPrivateIP - + ") is already in use in pod: " - + pod.getName() + " and zone: " - + dc.getName()); - } - if (existingPrivateIPs.size() == 1) { - DataCenterIpAddressVO vo = existingPrivateIPs.get(0); - if (vo.getInstanceId() != null) { - throw new IllegalArgumentException( - "The private ip address of the server (" - + serverPrivateIP - + ") is already in use in pod: " - + pod.getName() + " and zone: " - + dc.getName()); - } - } - } - } - - if (serverPublicIP != null - && !_publicIPAddressDao - .mark(dc.getId(), new Ip(serverPublicIP))) { - // If the server's public IP address is already in the database, - // return false - List existingPublicIPs = _publicIPAddressDao - .listByDcIdIpAddress(dc.getId(), serverPublicIP); - if (existingPublicIPs.size() > 0) { - throw new IllegalArgumentException( - "The public ip address of the server (" - + serverPublicIP - + ") is already in use in zone: " - + dc.getName()); - } - } - } - - @Override - public Host addHost(long zoneId, ServerResource resource, Type hostType, - Map hostDetails) { - // Check if the zone exists in the system - if (_dcDao.findById(zoneId) == null) { - throw new InvalidParameterValueException("Can't find zone with id " - + zoneId); - } - - Map details = hostDetails; - String guid = details.get("guid"); - List currentHosts = _hostDao.listBy(hostType, zoneId); - for (HostVO currentHost : currentHosts) { - if (currentHost.getGuid().equals(guid)) { - return currentHost; - } - } - - AgentAttache attache = simulateStart(resource, hostDetails, true, null, null); - return _hostDao.findById(attache.getId()); - } - - public HostVO createHost(final StartupCommand startup, - ServerResource resource, Map details, - boolean directFirst, List hostTags, String allocationState) throws IllegalArgumentException { - Host.Type type = null; - - if (startup instanceof StartupStorageCommand) { - StartupStorageCommand ssCmd = ((StartupStorageCommand) startup); - if(ssCmd.getHostType() == Host.Type.SecondaryStorageCmdExecutor) { - type = ssCmd.getHostType(); - } else { - if (ssCmd.getResourceType() == Storage.StorageResourceType.SECONDARY_STORAGE) { - type = Host.Type.SecondaryStorage; - if (resource != null - && resource instanceof DummySecondaryStorageResource) { - resource = null; - } - } else { - type = Host.Type.Storage; - } - final Map hostDetails = ssCmd.getHostDetails(); - if (hostDetails != null) { - if (details != null) { - details.putAll(hostDetails); - } else { - details = hostDetails; - } - } - } - } else if (startup instanceof StartupRoutingCommand) { - StartupRoutingCommand ssCmd = ((StartupRoutingCommand) startup); - type = Host.Type.Routing; - final Map hostDetails = ssCmd.getHostDetails(); - if (hostDetails != null) { - if (details != null) { - details.putAll(hostDetails); - } else { - details = hostDetails; - } - } - } else if (startup instanceof StartupProxyCommand) { - type = Host.Type.ConsoleProxy; - } else if (startup instanceof StartupRoutingCommand) { - type = Host.Type.Routing; - } else if (startup instanceof StartupExternalFirewallCommand) { - type = Host.Type.ExternalFirewall; - } else if (startup instanceof StartupExternalLoadBalancerCommand) { - type = Host.Type.ExternalLoadBalancer; - } else if (startup instanceof StartupPxeServerCommand) { - type = Host.Type.PxeServer; - } else if (startup instanceof StartupExternalDhcpCommand) { - type = Host.Type.ExternalDhcp; - } else { - assert false : "Did someone add a new Startup command?"; - } - - Long id = null; - HostVO server = _hostDao.findByGuid(startup.getGuid()); - if (server == null) { - server = _hostDao.findByGuid(startup.getGuidWithoutResource()); - } - if (server != null && server.getRemoved() == null) { - id = server.getId(); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Found the host " + id + " by guid: " - + startup.getGuid()); - } - if (directFirst) { - s_logger.debug("Old host reconnected as new"); - return null; - } - } else { - server = new HostVO(startup.getGuid()); - } - - server.setDetails(details); - server.setHostTags(hostTags); - - if(allocationState != null){ - try{ - HostAllocationState hostAllocationState = Host.HostAllocationState.valueOf(allocationState); - if(hostAllocationState != null){ - server.setHostAllocationState(hostAllocationState); - } - }catch(IllegalArgumentException ex){ - s_logger.error("Unable to resolve " + allocationState + " to a valid supported host allocation State, defaulting to 'Enabled'"); - server.setHostAllocationState(Host.HostAllocationState.Enabled); - } - }else{ - server.setHostAllocationState(Host.HostAllocationState.Enabled); - } - - updateHost(server, startup, type, _nodeId); - if (resource != null) { - server.setResource(resource.getClass().getName()); - } - if (id == null) { - /* - * // ignore integrity check for agent-simulator - * if(!"0.0.0.0".equals(startup.getPrivateIpAddress()) && - * !"0.0.0.0".equals(startup.getStorageIpAddress())) { if - * (_hostDao.findByPrivateIpAddressInDataCenter - * (server.getDataCenterId(), startup.getPrivateIpAddress()) != - * null) { throw newIllegalArgumentException( - * "The private ip address is already in used: " + - * startup.getPrivateIpAddress()); } - * - * if - * (_hostDao.findByPrivateIpAddressInDataCenter(server.getDataCenterId - * (), startup.getStorageIpAddress()) != null) { throw new - * IllegalArgumentException - * ("The private ip address is already in used: " + - * startup.getStorageIpAddress()); } } - */ - - if (startup instanceof StartupProxyCommand) { - server.setProxyPort(((StartupProxyCommand) startup) - .getProxyPort()); - } - - server = _hostDao.persist(server); - id = server.getId(); - - s_logger.info("New " + server.getType().toString() - + " host connected w/ guid " + startup.getGuid() - + " and id is " + id); - } else { - if (!_hostDao.connect(server, _nodeId)) { - throw new CloudRuntimeException( - "Agent cannot connect because the current state is " - + server.getStatus().toString()); - } - s_logger.info("Old " + server.getType().toString() - + " host reconnected w/ id =" + id); - } - createCapacityEntry(startup, server); - - return server; - } - - public HostVO createHost(final StartupCommand[] startup, - ServerResource resource, Map details, - boolean directFirst, List hostTags, String allocationState) throws IllegalArgumentException { - StartupCommand firstCmd = startup[0]; - HostVO result = createHost(firstCmd, resource, details, directFirst, hostTags, allocationState); - if (result == null) { - return null; - } - return result; - } - - public AgentAttache handleConnect(final Link link, - final StartupCommand[] startup) throws IllegalArgumentException, - ConnectionException { - HostVO server = createHost(startup, null, null, false, null, null); - if (server == null) { - return null; - } - long id = server.getId(); - - AgentAttache attache = createAttache(id, server, link); - - attache = notifyMonitorsOfConnection(attache, startup); - - return attache; - } - - public AgentAttache findAgent(long hostId) { - synchronized (_agents) { - return _agents.get(hostId); - } - } - - protected AgentAttache createAttache(long id, HostVO server, Link link) { - s_logger.debug("create ConnectedAgentAttache for " + id); - final AgentAttache attache = new ConnectedAgentAttache(this, id, link, - server.getStatus() == Status.Maintenance - || server.getStatus() == Status.ErrorInMaintenance - || server.getStatus() == Status.PrepareForMaintenance); - link.attach(attache); - AgentAttache old = null; - synchronized (_agents) { - old = _agents.get(id); - _agents.put(id, attache); - } - if (old != null) { - old.disconnect(Status.Removed); - } - return attache; - } - - protected AgentAttache createAttache(long id, HostVO server, - ServerResource resource) { - if (resource instanceof DummySecondaryStorageResource - || resource instanceof KvmDummyResourceBase) { - return new DummyAttache(this, id, false); - } - s_logger.debug("create DirectAgentAttache for " + id); - final DirectAgentAttache attache = new DirectAgentAttache(this, id, resource, - server.getStatus() == Status.Maintenance - || server.getStatus() == Status.ErrorInMaintenance - || server.getStatus() == Status.PrepareForMaintenance, - this); - AgentAttache old = null; - synchronized (_agents) { - old = _agents.get(id); - _agents.put(id, attache); - } - if (old != null) { - old.disconnect(Status.Removed); - } - return attache; - } - - @Override - public boolean maintenanceFailed(long hostId) { - HostVO host = _hostDao.findById(hostId); - if (host == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cant not find host " + hostId); - } - return false; - } else { - return _hostDao.updateStatus(host, Event.UnableToMigrate, _nodeId); - } - } - - @Override - public Host updateHost(UpdateHostCmd cmd) - throws InvalidParameterValueException { - Long hostId = cmd.getId(); - Long guestOSCategoryId = cmd.getOsCategoryId(); - - if (guestOSCategoryId != null) { - - // Verify that the host exists - HostVO host = _hostDao.findById(hostId); - if (host == null) { - throw new InvalidParameterValueException("Host with id " - + hostId + " doesn't exist"); - } - - // Verify that the guest OS Category exists - if (guestOSCategoryId > 0) { - if (_guestOSCategoryDao.findById(guestOSCategoryId) == null) { - throw new InvalidParameterValueException( - "Please specify a valid guest OS category."); - } - } - - GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao - .findById(guestOSCategoryId); - Map hostDetails = _hostDetailsDao - .findDetails(hostId); - - if (guestOSCategory != null) { - // Save a new entry for guest.os.category.id - hostDetails.put("guest.os.category.id", - String.valueOf(guestOSCategory.getId())); - } else { - // Delete any existing entry for guest.os.category.id - hostDetails.remove("guest.os.category.id"); - } - _hostDetailsDao.persist(hostId, hostDetails); - } - - String allocationState = cmd.getAllocationState(); - if(allocationState != null){ - // Verify that the host exists - HostVO host = _hostDao.findById(hostId); - if (host == null) { - throw new InvalidParameterValueException("Host with id " - + hostId + " doesn't exist"); - } - - try{ - HostAllocationState newAllocationState = Host.HostAllocationState.valueOf(allocationState); - if (newAllocationState == null) { - s_logger.error("Unable to resolve " + allocationState + " to a valid supported allocation State"); - throw new InvalidParameterValueException("Unable to resolve " + allocationState + " to a supported state"); - }else{ - host.setHostAllocationState(newAllocationState); - } - }catch(IllegalArgumentException ex){ - s_logger.error("Unable to resolve " + allocationState + " to a valid supported allocation State"); - throw new InvalidParameterValueException("Unable to resolve " + allocationState + " to a supported state"); - } - - _hostDao.update(hostId, host); - } - - HostVO updatedHost = _hostDao.findById(hostId); - return updatedHost; - } - - protected void updateHost(final HostVO host, final StartupCommand startup, - final Host.Type type, final long msId) - throws IllegalArgumentException { - s_logger.debug("updateHost() called"); - - String dataCenter = startup.getDataCenter(); - String pod = startup.getPod(); - String cluster = startup.getCluster(); - - if (pod != null && dataCenter != null - && pod.equalsIgnoreCase("default") - && dataCenter.equalsIgnoreCase("default")) { - List pods = _podDao.listAllIncludingRemoved(); - for (HostPodVO hpv : pods) { - if (checkCIDR(type, hpv, startup.getPrivateIpAddress(), - startup.getPrivateNetmask())) { - pod = hpv.getName(); - dataCenter = _dcDao.findById(hpv.getDataCenterId()) - .getName(); - break; - } - } - } - long dcId = -1; - DataCenterVO dc = _dcDao.findByName(dataCenter); - if (dc == null) { - try { - dcId = Long.parseLong(dataCenter); - dc = _dcDao.findById(dcId); - } catch (final NumberFormatException e) { - } - } - if (dc == null) { - throw new IllegalArgumentException("Host " - + startup.getPrivateIpAddress() - + " sent incorrect data center: " + dataCenter); - } - dcId = dc.getId(); - - HostPodVO p = _podDao.findByName(pod, dcId); - if (p == null) { - try { - final long podId = Long.parseLong(pod); - p = _podDao.findById(podId); - } catch (final NumberFormatException e) { - } - } - Long podId = null; - if (p == null) { - if (type != Host.Type.SecondaryStorage - && type != Host.Type.ExternalFirewall - && type != Host.Type.ExternalLoadBalancer) { - - /* - * s_logger.info("Unable to find the pod so we are creating one." - * ); p = createPod(pod, dcId, startup.getPrivateIpAddress(), - * NetUtils.getCidrSize(startup.getPrivateNetmask())); podId = - * p.getId(); - */ - s_logger.error("Host " + startup.getPrivateIpAddress() - + " sent incorrect pod: " + pod + " in " + dataCenter); - throw new IllegalArgumentException("Host " - + startup.getPrivateIpAddress() - + " sent incorrect pod: " + pod + " in " + dataCenter); - } - } else { - podId = p.getId(); - } - - Long clusterId = null; - if (cluster != null) { - try { - clusterId = Long.valueOf(cluster); - } catch (NumberFormatException e) { - ClusterVO c = _clusterDao.findBy(cluster, podId); - if (c == null) { - c = new ClusterVO(dcId, podId, cluster); - c = _clusterDao.persist(c); - } - clusterId = c.getId(); - } - } - - if (type == Host.Type.Routing) { - StartupRoutingCommand scc = (StartupRoutingCommand) startup; - - HypervisorType hypervisorType = scc.getHypervisorType(); - boolean doCidrCheck = true; - - ClusterVO clusterVO = _clusterDao.findById(clusterId); - if (clusterVO.getHypervisorType() != scc.getHypervisorType()) { - throw new IllegalArgumentException( - "Can't add host whose hypervisor type is: " - + scc.getHypervisorType() + " into cluster: " - + clusterId + " whose hypervisor type is: " - + clusterVO.getHypervisorType()); - } - - /* - * KVM:Enforcement that all the hosts in the cluster have the same - * os type, for migration - */ - if (scc.getHypervisorType() == HypervisorType.KVM) { - List hostsInCluster = _hostDao.listByCluster(clusterId); - if (!hostsInCluster.isEmpty()) { - HostVO oneHost = hostsInCluster.get(0); - _hostDao.loadDetails(oneHost); - String hostOsInCluster = oneHost.getDetail("Host.OS"); - String hostOs = scc.getHostDetails().get("Host.OS"); - if (!hostOsInCluster.equalsIgnoreCase(hostOs)) { - throw new IllegalArgumentException("Can't add host: " - + startup.getPrivateIpAddress() - + " with hostOS: " + hostOs - + " into a cluster," + "in which there are " - + hostOsInCluster + " hosts added"); - } - } - } - - // If this command is from the agent simulator, don't do the CIDR - // check - if (scc.getAgentTag() != null - && startup.getAgentTag() - .equalsIgnoreCase("vmops-simulator")) { - doCidrCheck = false; - } - - // If this command is from a KVM agent, or from an agent that has a - // null hypervisor type, don't do the CIDR check - if (hypervisorType == null || hypervisorType == HypervisorType.KVM - || hypervisorType == HypervisorType.VMware || hypervisorType == HypervisorType.BareMetal || hypervisorType == HypervisorType.Simulator) { - doCidrCheck = false; - } - - if (doCidrCheck) { - s_logger.info("Host: " + host.getName() - + " connected with hypervisor type: " + hypervisorType - + ". Checking CIDR..."); - } else { - s_logger.info("Host: " + host.getName() - + " connected with hypervisor type: " + hypervisorType - + ". Skipping CIDR check..."); - } - - if (doCidrCheck) { - checkCIDR(type, p, dc, scc.getPrivateIpAddress(), - scc.getPrivateNetmask()); - } - - // Check if the private/public IPs of the server are already in the - // private/public IP address tables - checkIPConflicts(type, p, dc, scc.getPrivateIpAddress(), - scc.getPublicIpAddress(), scc.getPublicIpAddress(), - scc.getPublicNetmask()); - } - - host.setDataCenterId(dc.getId()); - host.setPodId(podId); - host.setClusterId(clusterId); - host.setPrivateIpAddress(startup.getPrivateIpAddress()); - host.setPrivateNetmask(startup.getPrivateNetmask()); - host.setPrivateMacAddress(startup.getPrivateMacAddress()); - host.setPublicIpAddress(startup.getPublicIpAddress()); - host.setPublicMacAddress(startup.getPublicMacAddress()); - host.setPublicNetmask(startup.getPublicNetmask()); - host.setStorageIpAddress(startup.getStorageIpAddress()); - host.setStorageMacAddress(startup.getStorageMacAddress()); - host.setStorageNetmask(startup.getStorageNetmask()); - host.setVersion(startup.getVersion()); - host.setName(startup.getName()); - host.setType(type); - host.setManagementServerId(msId); - host.setStorageUrl(startup.getIqn()); - host.setLastPinged(System.currentTimeMillis() >> 10); - if (startup instanceof StartupRoutingCommand) { - final StartupRoutingCommand scc = (StartupRoutingCommand) startup; - host.setCaps(scc.getCapabilities()); - host.setCpus(scc.getCpus()); - host.setTotalMemory(scc.getMemory()); - host.setSpeed(scc.getSpeed()); - HypervisorType hyType = scc.getHypervisorType(); - host.setHypervisorType(hyType); - - } else if (startup instanceof StartupStorageCommand) { - final StartupStorageCommand ssc = (StartupStorageCommand) startup; - host.setParent(ssc.getParent()); - host.setTotalSize(ssc.getTotalSize()); - host.setHypervisorType(HypervisorType.None); - if (ssc.getNfsShare() != null) { - host.setStorageUrl(ssc.getNfsShare()); - } - } - if (startup.getStorageIpAddressDeux() != null) { - host.setStorageIpAddressDeux(startup.getStorageIpAddressDeux()); - host.setStorageMacAddressDeux(startup.getStorageMacAddressDeux()); - host.setStorageNetmaskDeux(startup.getStorageNetmaskDeux()); - } - - } - - @Override - public Host getHost(long hostId){ - return _hostDao.findById(hostId); - } - - // create capacity entries if none exist for this server - private void createCapacityEntry(final StartupCommand startup, HostVO server) { - SearchCriteria capacitySC = _capacityDao - .createSearchCriteria(); - capacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, server.getId()); - capacitySC.addAnd("dataCenterId", SearchCriteria.Op.EQ, - server.getDataCenterId()); - capacitySC.addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); - List capacities = _capacityDao.search(capacitySC, null); - - // remove old entries, we'll recalculate them anyway - if (startup instanceof StartupStorageCommand) { - if ((capacities != null) && !capacities.isEmpty()) { - for (CapacityVO capacity : capacities) { - _capacityDao.remove(capacity.getId()); - } - } - } - - if (startup instanceof StartupStorageCommand) { - StartupStorageCommand ssCmd = (StartupStorageCommand) startup; - if (ssCmd.getResourceType() == Storage.StorageResourceType.STORAGE_HOST) { - CapacityVO capacity = new CapacityVO(server.getId(), - server.getDataCenterId(), server.getPodId(), server.getClusterId(), 0L, - (long)(server.getTotalSize() * _overProvisioningFactor), - CapacityVO.CAPACITY_TYPE_STORAGE_ALLOCATED); - _capacityDao.persist(capacity); - } - } else if (startup instanceof StartupRoutingCommand) { - SearchCriteria capacityCPU = _capacityDao - .createSearchCriteria(); - capacityCPU.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, - server.getId()); - capacityCPU.addAnd("dataCenterId", SearchCriteria.Op.EQ, - server.getDataCenterId()); - capacityCPU - .addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); - capacityCPU.addAnd("capacityType", SearchCriteria.Op.EQ, - CapacityVO.CAPACITY_TYPE_CPU); - List capacityVOCpus = _capacityDao.search(capacitySC, - null); - - if (capacityVOCpus != null && !capacityVOCpus.isEmpty()) { - CapacityVO CapacityVOCpu = capacityVOCpus.get(0); - long newTotalCpu = (long) (server.getCpus().longValue() - * server.getSpeed().longValue() * _cpuOverProvisioningFactor); - if ((CapacityVOCpu.getTotalCapacity() <= newTotalCpu) - || ((CapacityVOCpu.getUsedCapacity() + CapacityVOCpu - .getReservedCapacity()) <= newTotalCpu)) { - CapacityVOCpu.setTotalCapacity(newTotalCpu); - } else if ((CapacityVOCpu.getUsedCapacity() - + CapacityVOCpu.getReservedCapacity() > newTotalCpu) - && (CapacityVOCpu.getUsedCapacity() < newTotalCpu)) { - CapacityVOCpu.setReservedCapacity(0); - CapacityVOCpu.setTotalCapacity(newTotalCpu); - } else { - s_logger.debug("What? new cpu is :" + newTotalCpu - + ", old one is " + CapacityVOCpu.getUsedCapacity() - + "," + CapacityVOCpu.getReservedCapacity() + "," - + CapacityVOCpu.getTotalCapacity()); - } - _capacityDao.update(CapacityVOCpu.getId(), CapacityVOCpu); - } else { - CapacityVO capacity = new CapacityVO( - server.getId(), - server.getDataCenterId(), - server.getPodId(), - server.getClusterId(), - 0L, - (long) (server.getCpus().longValue() - * server.getSpeed().longValue() * _cpuOverProvisioningFactor), - CapacityVO.CAPACITY_TYPE_CPU); - _capacityDao.persist(capacity); - } - - SearchCriteria capacityMem = _capacityDao - .createSearchCriteria(); - capacityMem.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, - server.getId()); - capacityMem.addAnd("dataCenterId", SearchCriteria.Op.EQ, - server.getDataCenterId()); - capacityMem - .addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); - capacityMem.addAnd("capacityType", SearchCriteria.Op.EQ, - CapacityVO.CAPACITY_TYPE_MEMORY); - List capacityVOMems = _capacityDao.search(capacityMem, - null); - - if (capacityVOMems != null && !capacityVOMems.isEmpty()) { - CapacityVO CapacityVOMem = capacityVOMems.get(0); - long newTotalMem = server.getTotalMemory(); - if (CapacityVOMem.getTotalCapacity() <= newTotalMem - || (CapacityVOMem.getUsedCapacity() - + CapacityVOMem.getReservedCapacity() <= newTotalMem)) { - CapacityVOMem.setTotalCapacity(newTotalMem); - } else if (CapacityVOMem.getUsedCapacity() - + CapacityVOMem.getReservedCapacity() > newTotalMem - && CapacityVOMem.getUsedCapacity() < newTotalMem) { - CapacityVOMem.setReservedCapacity(0); - CapacityVOMem.setTotalCapacity(newTotalMem); - } else { - s_logger.debug("What? new cpu is :" + newTotalMem - + ", old one is " + CapacityVOMem.getUsedCapacity() - + "," + CapacityVOMem.getReservedCapacity() + "," - + CapacityVOMem.getTotalCapacity()); - } - _capacityDao.update(CapacityVOMem.getId(), CapacityVOMem); - } else { - CapacityVO capacity = new CapacityVO(server.getId(), - server.getDataCenterId(), server.getPodId(), server.getClusterId(), 0L, - server.getTotalMemory(), - CapacityVO.CAPACITY_TYPE_MEMORY); - _capacityDao.persist(capacity); - } - } - - } - - // protected void upgradeAgent(final Link link, final byte[] request, final - // String reason) { - // - // if (reason == UnsupportedVersionException.IncompatibleVersion) { - // final UpgradeResponse response = new UpgradeResponse(request, - // _upgradeMgr.getAgentUrl()); - // try { - // s_logger.info("Asking for the agent to update due to incompatible version: " - // + response.toString()); - // link.send(response.toBytes()); - // } catch (final ClosedChannelException e) { - // s_logger.warn("Unable to send response due to connection closed: " + - // response.toString()); - // } - // return; - // } - // - // assert (reason == UnsupportedVersionException.UnknownVersion) : - // "Unknown reason: " + reason; - // final UpgradeResponse response = new UpgradeResponse(request, - // _upgradeMgr.getAgentUrl()); - // try { - // s_logger.info("Asking for the agent to update due to unknown version: " + - // response.toString()); - // link.send(response.toBytes()); - // } catch (final ClosedChannelException e) { - // s_logger.warn("Unable to send response due to connection closed: " + - // response.toString()); - // } - // } - - protected class SimulateStartTask implements Runnable { - ServerResource resource; - Map details; - long id; - ActionDelegate actionDelegate; - - public SimulateStartTask(long id, ServerResource resource, - Map details, ActionDelegate actionDelegate) { - this.id = id; - this.resource = resource; - this.details = details; - this.actionDelegate = actionDelegate; - } - - @Override - public void run() { + + @Override + public boolean deleteHost(long hostId, boolean isForced) { + User caller = _accountMgr.getActiveUser(UserContext.current().getCallerUserId()); + // Verify that host exists + HostVO host = _hostDao.findById(hostId); + if (host == null) { + throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); + } + _accountMgr.checkAccessAndSpecifyAuthority(UserContext.current().getCaller(), host.getDataCenterId()); + return deleteHost(hostId, isForced, caller); + } + + @DB + protected boolean deleteSecondaryStorageHost(HostVO secStorageHost) { + long zoneId = secStorageHost.getDataCenterId(); + long hostId = secStorageHost.getId(); + Transaction txn = Transaction.currentTxn(); + try { + + List allVmsInZone = _vmDao.listByZoneId(zoneId); + if (!allVmsInZone.isEmpty()) { + s_logger.warn("Cannot delete secondary storage host when there are " + allVmsInZone.size() + " vms in zone " + zoneId); + return false; + } + txn.start(); + + if (!_hostDao.updateStatus(secStorageHost, Event.MaintenanceRequested, _nodeId)) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Unable to take host " + hostId + " into maintenance mode. Delete call is ignored"); + } + return false; + } + if (!_hostDao.updateStatus(secStorageHost, Event.PreparationComplete, _nodeId)) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Unable to take host " + hostId + " into maintenance mode. Delete call is ignored"); + } + return false; + } + + AgentAttache attache = findAttache(hostId); + if (attache != null) { + handleDisconnect(attache, Status.Event.Remove, false); + } + // now delete the host + secStorageHost.setGuid(null); + _hostDao.update(secStorageHost.getId(), secStorageHost); + _hostDao.remove(secStorageHost.getId()); + + // delete the templates associated with this host + SearchCriteria templateHostSC = _vmTemplateHostDao.createSearchCriteria(); + templateHostSC.addAnd("hostId", SearchCriteria.Op.EQ, secStorageHost.getId()); + _vmTemplateHostDao.remove(templateHostSC); + + // delete the op_host_capacity entry + SearchCriteria secStorageCapacitySC = _capacityDao.createSearchCriteria(); + secStorageCapacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, secStorageHost.getId()); + secStorageCapacitySC.addAnd("capacityType", SearchCriteria.Op.EQ, Capacity.CAPACITY_TYPE_SECONDARY_STORAGE); + _capacityDao.remove(secStorageCapacitySC); + + /* Disconnected agent needs special handling here */ + secStorageHost.setGuid(null); + + txn.commit(); + return true; + } catch (Throwable t) { + s_logger.error("Unable to delete sec storage host: " + secStorageHost.getId(), t); + return false; + } + } + + @Override + public boolean isVirtualMachineUpgradable(final UserVm vm, final ServiceOffering offering) { + Enumeration en = _hostAllocators.enumeration(); + boolean isMachineUpgradable = true; + while (isMachineUpgradable && en.hasMoreElements()) { + final HostAllocator allocator = en.nextElement(); + isMachineUpgradable = allocator.isVirtualMachineUpgradable(vm, offering); + } + + return isMachineUpgradable; + } + + protected int getPingInterval() { + return _pingInterval; + } + + @Override + public Answer send(Long hostId, Command cmd, int timeout) throws AgentUnavailableException, OperationTimedoutException { + Commands cmds = new Commands(OnError.Revert); + cmds.addCommand(cmd); + send(hostId, cmds, timeout); + Answer[] answers = cmds.getAnswers(); + if (answers != null && !(answers[0] instanceof UnsupportedAnswer)) { + return answers[0]; + } + + if (answers != null && (answers[0] instanceof UnsupportedAnswer)) { + s_logger.warn("Unsupported Command: " + answers[0].getDetails()); + return answers[0]; + } + + return null; + } + + @DB + protected boolean noDbTxn() { + Transaction txn = Transaction.currentTxn(); + return !txn.dbTxnStarted(); + } + + @Override + public Answer[] send(Long hostId, Commands commands, int timeout) throws AgentUnavailableException, OperationTimedoutException { + assert hostId != null : "Who's not checking the agent id before sending? ... (finger wagging)"; + if (hostId == null) { + throw new AgentUnavailableException(-1); + } + + // assert noDbTxn() : + // "I know, I know. Why are we so strict as to not allow txn across an agent call? ... Why are we so cruel ... Why are we such a dictator .... Too bad... Sorry...but NO AGENT COMMANDS WRAPPED WITHIN DB TRANSACTIONS!"; + + Command[] cmds = commands.toCommands(); + + assert cmds.length > 0 : "Ask yourself this about a hundred times. Why am I sending zero length commands?"; + + if (cmds.length == 0) { + commands.setAnswers(new Answer[0]); + } + + final AgentAttache agent = getAttache(hostId); + if (agent == null || agent.isClosed()) { + throw new AgentUnavailableException("agent not logged into this management server", hostId); + } + + long seq = _hostDao.getNextSequence(hostId); + Request req = new Request(seq, hostId, _nodeId, cmds, commands.stopOnError(), true, commands.revertOnError()); + Answer[] answers = agent.send(req, timeout); + notifyAnswersToMonitors(hostId, seq, answers); + commands.setAnswers(answers); + return answers; + } + + protected Status investigate(AgentAttache agent) { + Long hostId = agent.getId(); + if (s_logger.isDebugEnabled()) { + s_logger.debug("checking if agent (" + hostId + ") is alive"); + } + + try { + long seq = _hostDao.getNextSequence(hostId); + Request req = new Request(seq, hostId, _nodeId, new CheckHealthCommand(), true); + Answer[] answers = agent.send(req, 50 * 1000); + if (answers != null && answers[0] != null) { + Status status = answers[0].getResult() ? Status.Up : Status.Down; + if (s_logger.isDebugEnabled()) { + s_logger.debug("agent (" + hostId + ") responded to checkHeathCommand, reporting that agent is " + status); + } + return status; + } + } catch (AgentUnavailableException e) { + s_logger.debug("Agent is unavailable so we move on."); + } catch (OperationTimedoutException e) { + s_logger.debug("Timed Out " + e.getMessage()); + } + + return _haMgr.investigate(hostId); + } + + protected AgentAttache getAttache(final Long hostId) throws AgentUnavailableException { + assert (hostId != null) : "Who didn't check their id value?"; + if (hostId == null) { + return null; + } + AgentAttache agent = findAttache(hostId); + if (agent == null) { + s_logger.debug("Unable to find agent for " + hostId); + throw new AgentUnavailableException("Unable to find agent ", hostId); + } + + return agent; + } + + @Override + public long send(Long hostId, Commands commands, Listener listener) throws AgentUnavailableException { + final AgentAttache agent = getAttache(hostId); + if (agent.isClosed()) { + return -1; + } + + Command[] cmds = commands.toCommands(); + + assert cmds.length > 0 : "Why are you sending zero length commands?"; + if (cmds.length == 0) { + return -1; + } + long seq = _hostDao.getNextSequence(hostId); + Request req = new Request(seq, hostId, _nodeId, cmds, commands.stopOnError(), true, commands.revertOnError()); + agent.send(req, listener); + return seq; + } + + @Override + public long gatherStats(final Long hostId, final Command cmd, final Listener listener) { + try { + return send(hostId, new Commands(cmd), listener); + } catch (final AgentUnavailableException e) { + return -1; + } + } + + public void removeAgent(AgentAttache attache, Status nextState) { + if (attache == null) { + return; + } + long hostId = attache.getId(); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Remove Agent : " + hostId); + } + AgentAttache removed = null; + boolean conflict = false; + synchronized (_agents) { + removed = _agents.remove(hostId); + if (removed != null && removed != attache) { + conflict = true; + _agents.put(hostId, removed); + removed = attache; + } + } + if (conflict) { + s_logger.debug("Agent for host " + hostId + " is created when it is being disconnected"); + } + if (removed != null) { + removed.disconnect(nextState); + } + } + + @Override + public void disconnect(final long hostId, final Status.Event event, final boolean investigate) { + AgentAttache attache = findAttache(hostId); + + if (attache != null) { + disconnect(attache, event, investigate); + } else { + HostVO host = _hostDao.findById(hostId); + if (host != null && host.getRemoved() == null) { + if (event != null && event.equals(Event.Remove)) { + host.setGuid(null); + host.setClusterId(null); + } + _hostDao.updateStatus(host, event, _nodeId); + } + } + } + + public void disconnect(AgentAttache attache, final Status.Event event, final boolean investigate) { + _executor.submit(new DisconnectTask(attache, event, investigate)); + } + + protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate) { + if (attache == null) { + return true; + } + + long hostId = attache.getId(); + + s_logger.info("Host " + hostId + " is disconnecting with event " + event.toString()); + + HostVO host = _hostDao.findById(hostId); + if (host == null) { + s_logger.warn("Can't find host with " + hostId); + removeAgent(attache, Status.Removed); + return true; + + } + final Status currentState = host.getStatus(); + if (currentState == Status.Down || currentState == Status.Alert || currentState == Status.Removed || currentState == Status.PrepareForMaintenance) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Host " + hostId + " is already " + currentState.toString()); + } + if (currentState != Status.PrepareForMaintenance) { + removeAgent(attache, currentState); + } + return true; + } + Status nextState = currentState.getNextStatus(event); + if (nextState == null) { + if (!(attache instanceof DirectAgentAttache)) { + return false; + } + + s_logger.debug("There is no transition from state " + currentState.toString() + " and event " + event.toString()); + assert false : "How did we get here. Look at the FSM"; + return false; + } + + if (s_logger.isDebugEnabled()) { + s_logger.debug("The next state is " + nextState.toString() + ", current state is " + currentState); + } + + // Now we go and correctly diagnose what the actual situation is + if (nextState == Status.Alert && investigate) { + s_logger.info("Investigating why host " + hostId + " has disconnected with event " + event.toString()); + + final Status determinedState = investigate(attache); + s_logger.info("The state determined is " + (determinedState != null ? determinedState.toString() : "undeterminable")); + + if (determinedState == null || determinedState == Status.Down) { + s_logger.error("Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs"); + + event = Event.HostDown; + } else if (determinedState == Status.Up) { + // we effectively pinged from the server here. + s_logger.info("Agent is determined to be up and running"); + _hostDao.updateStatus(host, Event.Ping, _nodeId); + return false; + } else if (determinedState == Status.Disconnected) { + s_logger.warn("Agent is disconnected but the host is still up: " + host.getId() + "-" + host.getName()); + if (currentState == Status.Disconnected) { + if (((System.currentTimeMillis() >> 10) - host.getLastPinged()) > _alertWait) { + s_logger.warn("Host " + host.getId() + " has been disconnected pass the time it should be disconnected."); + event = Event.WaitedTooLong; + } else { + s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); + return false; + } + } else if (currentState == Status.Updating) { + if (((System.currentTimeMillis() >> 10) - host.getLastPinged()) > _updateWait) { + s_logger.warn("Host " + host.getId() + " has been updating for too long"); + + event = Event.WaitedTooLong; + } else { + s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); + return false; + } + } else if (currentState == Status.Up) { + DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); + HostPodVO podVO = _podDao.findById(host.getPodId()); + String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); + if ((host.getType() != Host.Type.SecondaryStorage) && (host.getType() != Host.Type.ConsoleProxy)) { + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host disconnected, " + hostDesc, "If the agent for host [" + hostDesc + + "] is not restarted within " + _alertWait + " seconds, HA will begin on the VMs"); + } + event = Event.AgentDisconnected; + } + } else { + // if we end up here we are in alert state, send an alert + DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); + HostPodVO podVO = _podDao.findById(host.getPodId()); + String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc, "In availability zone " + host.getDataCenterId() + + ", host is in alert state: " + host.getId() + "-" + host.getName()); + } + } + + if (s_logger.isDebugEnabled()) { + s_logger.debug("Deregistering link for " + hostId + " with state " + nextState); + } + + _hostDao.disconnect(host, event, _nodeId); + + removeAgent(attache, nextState); + + host = _hostDao.findById(host.getId()); + if (host.getStatus() == Status.Alert || host.getStatus() == Status.Down) { + _haMgr.scheduleRestartForVmsOnHost(host); + } + + for (Pair monitor : _hostMonitors) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName()); + } + monitor.second().processDisconnect(hostId, nextState); + } + + return true; + } + + protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, final StartupCommand[] cmd) throws ConnectionException { + long hostId = attache.getId(); + HostVO host = _hostDao.findById(hostId); + for (Pair monitor : _hostMonitors) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Sending Connect to listener: " + monitor.second().getClass().getSimpleName()); + } + for (int i = 0; i < cmd.length; i++) { + try { + monitor.second().processConnect(host, cmd[i]); + } catch (ConnectionException e) { + if (e.isSetupError()) { + s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage()); + handleDisconnect(attache, Event.AgentDisconnected, false); + throw e; + } else { + s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage()); + handleDisconnect(attache, Event.ShutdownRequested, false); + return attache; + } + } + } + } + + Long dcId = host.getDataCenterId(); + ReadyCommand ready = new ReadyCommand(dcId); + Answer answer = easySend(hostId, ready); + if (answer == null || !answer.getResult()) { + // this is tricky part for secondary storage + // make it as disconnected, wait for secondary storage VM to be up + // return the attache instead of null, even it is disconnectede + handleDisconnect(attache, Event.AgentDisconnected, false); + } + + _hostDao.updateStatus(host, Event.Ready, _nodeId); + attache.ready(); + return attache; + } + + @Override + public boolean start() { + startDirectlyConnectedHosts(); + if (_monitor != null) { + _monitor.start(); + } + if (_connection != null) { + _connection.start(); + } + + return true; + } + + public void startDirectlyConnectedHosts() { + List hosts = _hostDao.findDirectlyConnectedHosts(); + for (HostVO host : hosts) { + loadDirectlyConnectedHost(host); + } + } + + @SuppressWarnings("rawtypes") + protected void loadDirectlyConnectedHost(HostVO host) { + String resourceName = host.getResource(); + ServerResource resource = null; + try { + Class clazz = Class.forName(resourceName); + Constructor constructor = clazz.getConstructor(); + resource = (ServerResource) constructor.newInstance(); + } catch (ClassNotFoundException e) { + s_logger.warn("Unable to find class " + host.getResource(), e); + return; + } catch (InstantiationException e) { + s_logger.warn("Unablet to instantiate class " + host.getResource(), e); + return; + } catch (IllegalAccessException e) { + s_logger.warn("Illegal access " + host.getResource(), e); + return; + } catch (SecurityException e) { + s_logger.warn("Security error on " + host.getResource(), e); + return; + } catch (NoSuchMethodException e) { + s_logger.warn("NoSuchMethodException error on " + host.getResource(), e); + return; + } catch (IllegalArgumentException e) { + s_logger.warn("IllegalArgumentException error on " + host.getResource(), e); + return; + } catch (InvocationTargetException e) { + s_logger.warn("InvocationTargetException error on " + host.getResource(), e); + return; + } + + _hostDao.loadDetails(host); + + HashMap params = new HashMap(host.getDetails().size() + 5); + params.putAll(host.getDetails()); + + params.put("guid", host.getGuid()); + params.put("zone", Long.toString(host.getDataCenterId())); + if (host.getPodId() != null) { + params.put("pod", Long.toString(host.getPodId())); + } + if (host.getClusterId() != null) { + params.put("cluster", Long.toString(host.getClusterId())); + String guid = null; + ClusterVO cluster = _clusterDao.findById(host.getClusterId()); + if (cluster.getGuid() == null) { + guid = host.getDetail("pool"); + } else { + guid = cluster.getGuid(); + } + if (guid == null || guid.isEmpty()) { + throw new CloudRuntimeException("Can not find guid for cluster " + cluster.getId() + " name " + cluster.getName()); + } + params.put("pool", guid); + } + + params.put("ipaddress", host.getPrivateIpAddress()); + params.put("secondary.storage.vm", "false"); + params.put("max.template.iso.size", _configDao.getValue("max.template.iso.size")); + + try { + resource.configure(host.getName(), params); + } catch (ConfigurationException e) { + s_logger.warn("Unable to configure resource due to ", e); + return; + } + + if (!resource.start()) { + s_logger.warn("Unable to start the resource"); + return; + } + host.setLastPinged(System.currentTimeMillis() >> 10); + host.setManagementServerId(_nodeId); + _hostDao.update(host.getId(), host); + _executor.execute(new SimulateStartTask(host.getId(), resource, host.getDetails(), null)); + } + + protected AgentAttache simulateStart(ServerResource resource, Map details, boolean old, List hostTags, String allocationState) throws IllegalArgumentException { + StartupCommand[] cmds = resource.initialize(); + if (cmds == null) { + return null; + } + + AgentAttache attache = null; + if (s_logger.isDebugEnabled()) { + new Request(0l, -1l, -1l, cmds, true, false, true).log(-1, "Startup request from directly connected host: "); + // s_logger.debug("Startup request from directly connected host: " + // + new Request(0l, -1l, -1l, cmds, true, false, true) + // .toString()); + } + try { + attache = handleDirectConnect(resource, cmds, details, old, hostTags, allocationState); + } catch (IllegalArgumentException ex) { + s_logger.warn("Unable to connect due to ", ex); + throw ex; + } catch (Exception e) { + s_logger.warn("Unable to connect due to ", e); + } + + if (attache == null) { + resource.disconnected(); + return null; + } + if (attache.isReady()) { + StartupAnswer[] answers = new StartupAnswer[cmds.length]; + for (int i = 0; i < answers.length; i++) { + answers[i] = new StartupAnswer(cmds[i], attache.getId(), _pingInterval); + } + + attache.process(answers); + } + return attache; + } + + @Override + public boolean stop() { + if (_monitor != null) { + _monitor.signalStop(); + } + if (_connection != null) { + _connection.stop(); + } + + s_logger.info("Disconnecting agents: " + _agents.size()); + synchronized (_agents) { + for (final AgentAttache agent : _agents.values()) { + final HostVO host = _hostDao.findById(agent.getId()); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cant not find host " + agent.getId()); + } + } else { + _hostDao.updateStatus(host, Event.ManagementServerDown, _nodeId); + } + } + } + return true; + } + + @Override + public Pair findPod(final VirtualMachineTemplate template, ServiceOfferingVO offering, final DataCenterVO dc, final long accountId, Set avoids) { + final Enumeration en = _podAllocators.enumeration(); + while (en.hasMoreElements()) { + final PodAllocator allocator = (PodAllocator) en.nextElement(); + final Pair pod = allocator.allocateTo(template, offering, dc, accountId, avoids); + if (pod != null) { + return pod; + } + } + return null; + } + + @Override + public HostStats getHostStatistics(long hostId) { + Answer answer = easySend(hostId, new GetHostStatsCommand(_hostDao.findById(hostId).getGuid(), _hostDao.findById(hostId).getName(), hostId)); + + if (answer != null && (answer instanceof UnsupportedAnswer)) { + return null; + } + + if (answer == null || !answer.getResult()) { + String msg = "Unable to obtain host " + hostId + " statistics. "; + s_logger.warn(msg); + return null; + } else { + + // now construct the result object + if (answer instanceof GetHostStatsAnswer) { + return ((GetHostStatsAnswer) answer).getHostStats(); + } + } + return null; + } + + @Override + public Long getGuestOSCategoryId(long hostId) { + HostVO host = _hostDao.findById(hostId); + if (host == null) { + return null; + } else { + _hostDao.loadDetails(host); + DetailVO detail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); + if (detail == null) { + return null; + } else { + return Long.parseLong(detail.getValue()); + } + } + } + + @Override + public String getHostTags(long hostId) { + List hostTags = _hostTagsDao.gethostTags(hostId); + if (hostTags == null) { + return null; + } else { + return StringUtils.listToCsvTags(hostTags); + } + } + + @Override + public String getName() { + return _name; + } + + protected class DisconnectTask implements Runnable { + AgentAttache _attache; + Status.Event _event; + boolean _investigate; + + DisconnectTask(final AgentAttache attache, final Status.Event event, final boolean investigate) { + _attache = attache; + _event = event; + _investigate = investigate; + } + + @Override + public void run() { + try { + handleDisconnect(_attache, _event, _investigate); + } catch (final Exception e) { + s_logger.error("Exception caught while handling disconnect: ", e); + } finally { + StackMaid.current().exitCleanup(); + } + } + } + + @Override + public Answer easySend(final Long hostId, final Command cmd) { + return easySend(hostId, cmd, _wait); + } + + @Override + public Answer easySend(final Long hostId, final Command cmd, int timeout) { + try { + Host h = _hostDao.findById(hostId); + if (h == null || h.getRemoved() != null) { + s_logger.debug("Host with id " + hostId.toString() + " doesn't exist"); + return null; + } + Status status = h.getStatus(); + if (!status.equals(Status.Up) && !status.equals(Status.Connecting)) { + return null; + } + final Answer answer = send(hostId, cmd, timeout); + if (answer == null) { + s_logger.warn("send returns null answer"); + return null; + } + + if (!answer.getResult()) { + s_logger.warn("Unable to execute command: " + cmd.toString() + " due to " + answer.getDetails()); + return null; + } + + if (s_logger.isDebugEnabled() && answer.getDetails() != null) { + s_logger.debug("Details from executing " + cmd.getClass().toString() + ": " + answer.getDetails()); + } + + return answer; + + } catch (final AgentUnavailableException e) { + s_logger.warn(e.getMessage()); + return null; + } catch (final OperationTimedoutException e) { + s_logger.warn("Operation timed out: " + e.getMessage()); + return null; + } catch (final Exception e) { + s_logger.warn("Exception while sending", e); + return null; + } + } + + @Override + public Answer send(final Long hostId, final Command cmd) throws AgentUnavailableException, OperationTimedoutException { + return send(hostId, cmd, _wait); + } + + @Override + public Answer[] send(final Long hostId, Commands cmds) throws AgentUnavailableException, OperationTimedoutException { + return send(hostId, cmds, _wait); + } + + @Override + public Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException { + Long hostId = cmd.getId(); + + HostVO host = _hostDao.findById(hostId); + if (host == null) { + throw new InvalidParameterValueException("Host with id " + hostId.toString() + " doesn't exist"); + } + + boolean result = reconnect(hostId); + if (result) { + return host; + } + throw new CloudRuntimeException("Failed to reconnect host with id " + hostId.toString() + ", internal error."); + } + + @Override + public boolean reconnect(final long hostId) throws AgentUnavailableException { + HostVO host; + + host = _hostDao.findById(hostId); + if (host == null || host.getRemoved() != null) { + s_logger.warn("Unable to find host " + hostId); + return false; + } + + if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) { + s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus()); + return false; + } + + AgentAttache attache = findAttache(hostId); + if (attache == null) { + s_logger.info("Unable to disconnect host because it is not connected to this server: " + hostId); + return false; + } + + disconnect(attache, Event.ShutdownRequested, false); + return true; + } + + @Override + public boolean cancelMaintenance(final long hostId) { + + HostVO host; + host = _hostDao.findById(hostId); + if (host == null || host.getRemoved() != null) { + s_logger.warn("Unable to find host " + hostId); + return true; + } + + if (host.getStatus() != Status.PrepareForMaintenance && host.getStatus() != Status.Maintenance && host.getStatus() != Status.ErrorInMaintenance) { + return true; + } + + _haMgr.cancelScheduledMigrations(host); + List vms = _haMgr.findTakenMigrationWork(); + for (VMInstanceVO vm : vms) { + if (vm.getHostId() != null && vm.getHostId() == hostId) { + s_logger.info("Unable to cancel migration because the vm is being migrated: " + vm.toString()); + return false; + } + } + disconnect(hostId, Event.ResetRequested, false); + return true; + } + + @Override + public Host cancelMaintenance(CancelMaintenanceCmd cmd) throws InvalidParameterValueException { + Long hostId = cmd.getId(); + + // verify input parameters + HostVO host = _hostDao.findById(hostId); + if (host == null || host.getRemoved() != null) { + throw new InvalidParameterValueException("Host with id " + hostId.toString() + " doesn't exist"); + } + + boolean success = cancelMaintenance(hostId); + if (!success) { + throw new CloudRuntimeException("Internal error cancelling maintenance."); + } + return host; + } + + @Override + public boolean executeUserRequest(long hostId, Event event) throws AgentUnavailableException { + if (event == Event.MaintenanceRequested) { + return maintain(hostId); + } else if (event == Event.ResetRequested) { + return cancelMaintenance(hostId); + } else if (event == Event.Remove) { + User caller = _accountMgr.getActiveUser(User.UID_SYSTEM); + return deleteHost(hostId, false, caller); + } else if (event == Event.AgentDisconnected) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Received agent disconnect event for host " + hostId); + } + AgentAttache attache = null; + attache = findAttache(hostId); + if (attache != null) { + handleDisconnect(attache, Event.AgentDisconnected, false); + } + return true; + } else if (event == Event.ShutdownRequested) { + return reconnect(hostId); + } + return false; + } + + @Override + public boolean maintain(final long hostId) throws AgentUnavailableException { + HostVO host = _hostDao.findById(hostId); + Status state; + + Answer answer = easySend(hostId, new MaintainCommand()); + if (answer == null || !answer.getResult()) { + s_logger.warn("Unable to put host in maintainance mode: " + hostId); + return false; + } + + // Let's put this guy in maintenance state + do { + host = _hostDao.findById(hostId); + if (host == null) { + s_logger.debug("Unable to find host " + hostId); + return false; + } + state = host.getStatus(); + if (state == Status.Disconnected || state == Status.Updating) { + s_logger.debug("Unable to put host " + hostId + " in matinenance mode because it is currently in " + state.toString()); + throw new AgentUnavailableException("Agent is in " + state.toString() + " state. Please wait for it to become Alert state try again.", hostId); + } + } while (!_hostDao.updateStatus(host, Event.MaintenanceRequested, _nodeId)); + + AgentAttache attache = findAttache(hostId); + if (attache != null) { + attache.setMaintenanceMode(true); + } + + if (attache != null) { + // Now cancel all of the commands except for the active one. + attache.cancelAllCommands(Status.PrepareForMaintenance, false); + } + + final Host.Type type = host.getType(); + + if (type == Host.Type.Routing) { + + final List vms = _vmDao.listByHostId(hostId); + if (vms.size() == 0) { + return true; + } + + List hosts = _hostDao.listBy(host.getClusterId(), host.getPodId(), host.getDataCenterId()); + + for (final VMInstanceVO vm : vms) { + if (hosts == null || hosts.size() <= 1) { + // for the last host in this cluster, stop all the VMs + _haMgr.scheduleStop(vm, hostId, WorkType.ForceStop); + } else { + _haMgr.scheduleMigration(vm); + } + } + } + + return true; + } + + @Override + public Host maintain(PrepareForMaintenanceCmd cmd) throws InvalidParameterValueException { + Long hostId = cmd.getId(); + HostVO host = _hostDao.findById(hostId); + + if (host == null) { + s_logger.debug("Unable to find host " + hostId); + throw new InvalidParameterValueException("Unable to find host with ID: " + hostId + ". Please specify a valid host ID."); + } + + if (_hostDao.countBy(host.getClusterId(), Status.PrepareForMaintenance, Status.ErrorInMaintenance) > 0) { + throw new InvalidParameterValueException("There are other servers in PrepareForMaintenance OR ErrorInMaintenance STATUS in cluster " + host.getClusterId()); + } + + if (_storageMgr.isLocalStorageActiveOnHost(host)) { + throw new InvalidParameterValueException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage."); + } + + try { + if (maintain(hostId)) { + return _hostDao.findById(hostId); + } else { + throw new CloudRuntimeException("Unable to prepare for maintenance host " + hostId); + } + } catch (AgentUnavailableException e) { + throw new CloudRuntimeException("Unable to prepare for maintenance host " + hostId); + } + } + + public boolean checkCIDR(Host.Type type, HostPodVO pod, String serverPrivateIP, String serverPrivateNetmask) { + if (serverPrivateIP == null) { + return true; + } + // Get the CIDR address and CIDR size + String cidrAddress = pod.getCidrAddress(); + long cidrSize = pod.getCidrSize(); + + // If the server's private IP address is not in the same subnet as the + // pod's CIDR, return false + String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); + String serverSubnet = NetUtils.getSubNet(serverPrivateIP, serverPrivateNetmask); + if (!cidrSubnet.equals(serverSubnet)) { + return false; + } + + // If the server's private netmask is less inclusive than the pod's CIDR + // netmask, return false + String cidrNetmask = NetUtils.getCidrSubNet("255.255.255.255", cidrSize); + long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); + long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); + if (serverNetmaskNumeric > cidrNetmaskNumeric) { + return false; + } + return true; + } + + protected void checkCIDR(Host.Type type, HostPodVO pod, DataCenterVO dc, String serverPrivateIP, String serverPrivateNetmask) throws IllegalArgumentException { + // Skip this check for Storage Agents and Console Proxies + if (type == Host.Type.Storage || type == Host.Type.ConsoleProxy) { + return; + } + + if (serverPrivateIP == null) { + return; + } + // Get the CIDR address and CIDR size + String cidrAddress = pod.getCidrAddress(); + long cidrSize = pod.getCidrSize(); + + // If the server's private IP address is not in the same subnet as the + // pod's CIDR, return false + String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); + String serverSubnet = NetUtils.getSubNet(serverPrivateIP, serverPrivateNetmask); + if (!cidrSubnet.equals(serverSubnet)) { + s_logger.warn("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " + pod.getName() + " and zone: " + dc.getName()); + throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " + pod.getName() + " and zone: " + dc.getName()); + } + + // If the server's private netmask is less inclusive than the pod's CIDR + // netmask, return false + String cidrNetmask = NetUtils.getCidrSubNet("255.255.255.255", cidrSize); + long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); + long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); + if (serverNetmaskNumeric > cidrNetmaskNumeric) { + throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " + pod.getName() + " and zone: " + dc.getName()); + } + + } + + public void checkIPConflicts(Host.Type type, HostPodVO pod, DataCenterVO dc, String serverPrivateIP, String serverPrivateNetmask, String serverPublicIP, String serverPublicNetmask) { + // If the server's private IP is the same as is public IP, this host has + // a host-only private network. Don't check for conflicts with the + // private IP address table. + if (serverPrivateIP != serverPublicIP) { + if (!_privateIPAddressDao.mark(dc.getId(), pod.getId(), serverPrivateIP)) { + // If the server's private IP address is already in the + // database, return false + List existingPrivateIPs = _privateIPAddressDao.listByPodIdDcIdIpAddress(pod.getId(), dc.getId(), serverPrivateIP); + + assert existingPrivateIPs.size() <= 1 : " How can we get more than one ip address with " + serverPrivateIP; + if (existingPrivateIPs.size() > 1) { + throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is already in use in pod: " + pod.getName() + " and zone: " + dc.getName()); + } + if (existingPrivateIPs.size() == 1) { + DataCenterIpAddressVO vo = existingPrivateIPs.get(0); + if (vo.getInstanceId() != null) { + throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is already in use in pod: " + pod.getName() + " and zone: " + dc.getName()); + } + } + } + } + + if (serverPublicIP != null && !_publicIPAddressDao.mark(dc.getId(), new Ip(serverPublicIP))) { + // If the server's public IP address is already in the database, + // return false + List existingPublicIPs = _publicIPAddressDao.listByDcIdIpAddress(dc.getId(), serverPublicIP); + if (existingPublicIPs.size() > 0) { + throw new IllegalArgumentException("The public ip address of the server (" + serverPublicIP + ") is already in use in zone: " + dc.getName()); + } + } + } + + @Override + public Host addHost(long zoneId, ServerResource resource, Type hostType, Map hostDetails) { + // Check if the zone exists in the system + if (_dcDao.findById(zoneId) == null) { + throw new InvalidParameterValueException("Can't find zone with id " + zoneId); + } + + Map details = hostDetails; + String guid = details.get("guid"); + List currentHosts = _hostDao.listBy(hostType, zoneId); + for (HostVO currentHost : currentHosts) { + if (currentHost.getGuid().equals(guid)) { + return currentHost; + } + } + + AgentAttache attache = simulateStart(resource, hostDetails, true, null, null); + return _hostDao.findById(attache.getId()); + } + + public HostVO createHost(final StartupCommand startup, ServerResource resource, Map details, boolean directFirst, List hostTags, String allocationState) + throws IllegalArgumentException { + Host.Type type = null; + + if (startup instanceof StartupStorageCommand) { + StartupStorageCommand ssCmd = ((StartupStorageCommand) startup); + if (ssCmd.getHostType() == Host.Type.SecondaryStorageCmdExecutor) { + type = ssCmd.getHostType(); + } else { + if (ssCmd.getResourceType() == Storage.StorageResourceType.SECONDARY_STORAGE) { + type = Host.Type.SecondaryStorage; + if (resource != null && resource instanceof DummySecondaryStorageResource) { + resource = null; + } + } else { + type = Host.Type.Storage; + } + final Map hostDetails = ssCmd.getHostDetails(); + if (hostDetails != null) { + if (details != null) { + details.putAll(hostDetails); + } else { + details = hostDetails; + } + } + } + } else if (startup instanceof StartupRoutingCommand) { + StartupRoutingCommand ssCmd = ((StartupRoutingCommand) startup); + type = Host.Type.Routing; + final Map hostDetails = ssCmd.getHostDetails(); + if (hostDetails != null) { + if (details != null) { + details.putAll(hostDetails); + } else { + details = hostDetails; + } + } + } else if (startup instanceof StartupProxyCommand) { + type = Host.Type.ConsoleProxy; + } else if (startup instanceof StartupRoutingCommand) { + type = Host.Type.Routing; + } else if (startup instanceof StartupExternalFirewallCommand) { + type = Host.Type.ExternalFirewall; + } else if (startup instanceof StartupExternalLoadBalancerCommand) { + type = Host.Type.ExternalLoadBalancer; + } else if (startup instanceof StartupPxeServerCommand) { + type = Host.Type.PxeServer; + } else if (startup instanceof StartupExternalDhcpCommand) { + type = Host.Type.ExternalDhcp; + } else { + assert false : "Did someone add a new Startup command?"; + } + + Long id = null; + HostVO server = _hostDao.findByGuid(startup.getGuid()); + if (server == null) { + server = _hostDao.findByGuid(startup.getGuidWithoutResource()); + } + if (server != null && server.getRemoved() == null) { + id = server.getId(); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found the host " + id + " by guid: " + startup.getGuid()); + } + if (directFirst) { + s_logger.debug("Old host reconnected as new"); + return null; + } + } else { + server = new HostVO(startup.getGuid()); + } + + server.setDetails(details); + server.setHostTags(hostTags); + + if (allocationState != null) { + try { + HostAllocationState hostAllocationState = Host.HostAllocationState.valueOf(allocationState); + if (hostAllocationState != null) { + server.setHostAllocationState(hostAllocationState); + } + } catch (IllegalArgumentException ex) { + s_logger.error("Unable to resolve " + allocationState + " to a valid supported host allocation State, defaulting to 'Enabled'"); + server.setHostAllocationState(Host.HostAllocationState.Enabled); + } + } else { + server.setHostAllocationState(Host.HostAllocationState.Enabled); + } + + updateHost(server, startup, type, _nodeId); + if (resource != null) { + server.setResource(resource.getClass().getName()); + } + if (id == null) { + /* + * // ignore integrity check for agent-simulator if(!"0.0.0.0".equals(startup.getPrivateIpAddress()) && + * !"0.0.0.0".equals(startup.getStorageIpAddress())) { if (_hostDao.findByPrivateIpAddressInDataCenter + * (server.getDataCenterId(), startup.getPrivateIpAddress()) != null) { throw newIllegalArgumentException( + * "The private ip address is already in used: " + startup.getPrivateIpAddress()); } + * + * if (_hostDao.findByPrivateIpAddressInDataCenter(server.getDataCenterId (), startup.getStorageIpAddress()) != + * null) { throw new IllegalArgumentException ("The private ip address is already in used: " + + * startup.getStorageIpAddress()); } } + */ + + if (startup instanceof StartupProxyCommand) { + server.setProxyPort(((StartupProxyCommand) startup).getProxyPort()); + } + + server = _hostDao.persist(server); + id = server.getId(); + + s_logger.info("New " + server.getType().toString() + " host connected w/ guid " + startup.getGuid() + " and id is " + id); + } else { + if (!_hostDao.connect(server, _nodeId)) { + throw new CloudRuntimeException("Agent cannot connect because the current state is " + server.getStatus().toString()); + } + s_logger.info("Old " + server.getType().toString() + " host reconnected w/ id =" + id); + } + createCapacityEntry(startup, server); + + return server; + } + + public HostVO createHost(final StartupCommand[] startup, ServerResource resource, Map details, boolean directFirst, List hostTags, String allocationState) + throws IllegalArgumentException { + StartupCommand firstCmd = startup[0]; + HostVO result = createHost(firstCmd, resource, details, directFirst, hostTags, allocationState); + if (result == null) { + return null; + } + return result; + } + + public AgentAttache handleConnect(final Link link, final StartupCommand[] startup) throws IllegalArgumentException, ConnectionException { + HostVO server = createHost(startup, null, null, false, null, null); + if (server == null) { + return null; + } + long id = server.getId(); + + AgentAttache attache = createAttache(id, server, link); + + attache = notifyMonitorsOfConnection(attache, startup); + + return attache; + } + + public AgentAttache findAgent(long hostId) { + synchronized (_agents) { + return _agents.get(hostId); + } + } + + protected AgentAttache createAttache(long id, HostVO server, Link link) { + s_logger.debug("create ConnectedAgentAttache for " + id); + final AgentAttache attache = new ConnectedAgentAttache(this, id, link, server.getStatus() == Status.Maintenance || server.getStatus() == Status.ErrorInMaintenance + || server.getStatus() == Status.PrepareForMaintenance); + link.attach(attache); + AgentAttache old = null; + synchronized (_agents) { + old = _agents.get(id); + _agents.put(id, attache); + } + if (old != null) { + old.disconnect(Status.Removed); + } + return attache; + } + + protected AgentAttache createAttache(long id, HostVO server, ServerResource resource) { + if (resource instanceof DummySecondaryStorageResource || resource instanceof KvmDummyResourceBase) { + return new DummyAttache(this, id, false); + } + s_logger.debug("create DirectAgentAttache for " + id); + final DirectAgentAttache attache = new DirectAgentAttache(this, id, resource, server.getStatus() == Status.Maintenance || server.getStatus() == Status.ErrorInMaintenance + || server.getStatus() == Status.PrepareForMaintenance, this); + AgentAttache old = null; + synchronized (_agents) { + old = _agents.get(id); + _agents.put(id, attache); + } + if (old != null) { + old.disconnect(Status.Removed); + } + return attache; + } + + @Override + public boolean maintenanceFailed(long hostId) { + HostVO host = _hostDao.findById(hostId); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cant not find host " + hostId); + } + return false; + } else { + return _hostDao.updateStatus(host, Event.UnableToMigrate, _nodeId); + } + } + + @Override + public Host updateHost(UpdateHostCmd cmd) throws InvalidParameterValueException { + Long hostId = cmd.getId(); + Long guestOSCategoryId = cmd.getOsCategoryId(); + + if (guestOSCategoryId != null) { + + // Verify that the host exists + HostVO host = _hostDao.findById(hostId); + if (host == null) { + throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); + } + + // Verify that the guest OS Category exists + if (guestOSCategoryId > 0) { + if (_guestOSCategoryDao.findById(guestOSCategoryId) == null) { + throw new InvalidParameterValueException("Please specify a valid guest OS category."); + } + } + + GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId); + Map hostDetails = _hostDetailsDao.findDetails(hostId); + + if (guestOSCategory != null) { + // Save a new entry for guest.os.category.id + hostDetails.put("guest.os.category.id", String.valueOf(guestOSCategory.getId())); + } else { + // Delete any existing entry for guest.os.category.id + hostDetails.remove("guest.os.category.id"); + } + _hostDetailsDao.persist(hostId, hostDetails); + } + + String allocationState = cmd.getAllocationState(); + if (allocationState != null) { + // Verify that the host exists + HostVO host = _hostDao.findById(hostId); + if (host == null) { + throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); + } + + try { + HostAllocationState newAllocationState = Host.HostAllocationState.valueOf(allocationState); + if (newAllocationState == null) { + s_logger.error("Unable to resolve " + allocationState + " to a valid supported allocation State"); + throw new InvalidParameterValueException("Unable to resolve " + allocationState + " to a supported state"); + } else { + host.setHostAllocationState(newAllocationState); + } + } catch (IllegalArgumentException ex) { + s_logger.error("Unable to resolve " + allocationState + " to a valid supported allocation State"); + throw new InvalidParameterValueException("Unable to resolve " + allocationState + " to a supported state"); + } + + _hostDao.update(hostId, host); + } + + HostVO updatedHost = _hostDao.findById(hostId); + return updatedHost; + } + + protected void updateHost(final HostVO host, final StartupCommand startup, final Host.Type type, final long msId) throws IllegalArgumentException { + s_logger.debug("updateHost() called"); + + String dataCenter = startup.getDataCenter(); + String pod = startup.getPod(); + String cluster = startup.getCluster(); + + if (pod != null && dataCenter != null && pod.equalsIgnoreCase("default") && dataCenter.equalsIgnoreCase("default")) { + List pods = _podDao.listAllIncludingRemoved(); + for (HostPodVO hpv : pods) { + if (checkCIDR(type, hpv, startup.getPrivateIpAddress(), startup.getPrivateNetmask())) { + pod = hpv.getName(); + dataCenter = _dcDao.findById(hpv.getDataCenterId()).getName(); + break; + } + } + } + long dcId = -1; + DataCenterVO dc = _dcDao.findByName(dataCenter); + if (dc == null) { + try { + dcId = Long.parseLong(dataCenter); + dc = _dcDao.findById(dcId); + } catch (final NumberFormatException e) { + } + } + if (dc == null) { + throw new IllegalArgumentException("Host " + startup.getPrivateIpAddress() + " sent incorrect data center: " + dataCenter); + } + dcId = dc.getId(); + + HostPodVO p = _podDao.findByName(pod, dcId); + if (p == null) { + try { + final long podId = Long.parseLong(pod); + p = _podDao.findById(podId); + } catch (final NumberFormatException e) { + } + } + Long podId = null; + if (p == null) { + if (type != Host.Type.SecondaryStorage && type != Host.Type.ExternalFirewall && type != Host.Type.ExternalLoadBalancer) { + + /* + * s_logger.info("Unable to find the pod so we are creating one." ); p = createPod(pod, dcId, + * startup.getPrivateIpAddress(), NetUtils.getCidrSize(startup.getPrivateNetmask())); podId = p.getId(); + */ + s_logger.error("Host " + startup.getPrivateIpAddress() + " sent incorrect pod: " + pod + " in " + dataCenter); + throw new IllegalArgumentException("Host " + startup.getPrivateIpAddress() + " sent incorrect pod: " + pod + " in " + dataCenter); + } + } else { + podId = p.getId(); + } + + Long clusterId = null; + if (cluster != null) { + try { + clusterId = Long.valueOf(cluster); + } catch (NumberFormatException e) { + ClusterVO c = _clusterDao.findBy(cluster, podId); + if (c == null) { + c = new ClusterVO(dcId, podId, cluster); + c = _clusterDao.persist(c); + } + clusterId = c.getId(); + } + } + + if (type == Host.Type.Routing) { + StartupRoutingCommand scc = (StartupRoutingCommand) startup; + + HypervisorType hypervisorType = scc.getHypervisorType(); + boolean doCidrCheck = true; + + ClusterVO clusterVO = _clusterDao.findById(clusterId); + if (clusterVO.getHypervisorType() != scc.getHypervisorType()) { + throw new IllegalArgumentException("Can't add host whose hypervisor type is: " + scc.getHypervisorType() + " into cluster: " + clusterId + " whose hypervisor type is: " + + clusterVO.getHypervisorType()); + } + + /* + * KVM:Enforcement that all the hosts in the cluster have the same os type, for migration + */ + if (scc.getHypervisorType() == HypervisorType.KVM) { + List hostsInCluster = _hostDao.listByCluster(clusterId); + if (!hostsInCluster.isEmpty()) { + HostVO oneHost = hostsInCluster.get(0); + _hostDao.loadDetails(oneHost); + String hostOsInCluster = oneHost.getDetail("Host.OS"); + String hostOs = scc.getHostDetails().get("Host.OS"); + if (!hostOsInCluster.equalsIgnoreCase(hostOs)) { + throw new IllegalArgumentException("Can't add host: " + startup.getPrivateIpAddress() + " with hostOS: " + hostOs + " into a cluster," + "in which there are " + + hostOsInCluster + " hosts added"); + } + } + } + + // If this command is from the agent simulator, don't do the CIDR + // check + if (scc.getAgentTag() != null && startup.getAgentTag().equalsIgnoreCase("vmops-simulator")) { + doCidrCheck = false; + } + + // If this command is from a KVM agent, or from an agent that has a + // null hypervisor type, don't do the CIDR check + if (hypervisorType == null || hypervisorType == HypervisorType.KVM || hypervisorType == HypervisorType.VMware || hypervisorType == HypervisorType.BareMetal + || hypervisorType == HypervisorType.Simulator) { + doCidrCheck = false; + } + + if (doCidrCheck) { + s_logger.info("Host: " + host.getName() + " connected with hypervisor type: " + hypervisorType + ". Checking CIDR..."); + } else { + s_logger.info("Host: " + host.getName() + " connected with hypervisor type: " + hypervisorType + ". Skipping CIDR check..."); + } + + if (doCidrCheck) { + checkCIDR(type, p, dc, scc.getPrivateIpAddress(), scc.getPrivateNetmask()); + } + + // Check if the private/public IPs of the server are already in the + // private/public IP address tables + checkIPConflicts(type, p, dc, scc.getPrivateIpAddress(), scc.getPublicIpAddress(), scc.getPublicIpAddress(), scc.getPublicNetmask()); + } + + host.setDataCenterId(dc.getId()); + host.setPodId(podId); + host.setClusterId(clusterId); + host.setPrivateIpAddress(startup.getPrivateIpAddress()); + host.setPrivateNetmask(startup.getPrivateNetmask()); + host.setPrivateMacAddress(startup.getPrivateMacAddress()); + host.setPublicIpAddress(startup.getPublicIpAddress()); + host.setPublicMacAddress(startup.getPublicMacAddress()); + host.setPublicNetmask(startup.getPublicNetmask()); + host.setStorageIpAddress(startup.getStorageIpAddress()); + host.setStorageMacAddress(startup.getStorageMacAddress()); + host.setStorageNetmask(startup.getStorageNetmask()); + host.setVersion(startup.getVersion()); + host.setName(startup.getName()); + host.setType(type); + host.setManagementServerId(msId); + host.setStorageUrl(startup.getIqn()); + host.setLastPinged(System.currentTimeMillis() >> 10); + if (startup instanceof StartupRoutingCommand) { + final StartupRoutingCommand scc = (StartupRoutingCommand) startup; + host.setCaps(scc.getCapabilities()); + host.setCpus(scc.getCpus()); + host.setTotalMemory(scc.getMemory()); + host.setSpeed(scc.getSpeed()); + HypervisorType hyType = scc.getHypervisorType(); + host.setHypervisorType(hyType); + + } else if (startup instanceof StartupStorageCommand) { + final StartupStorageCommand ssc = (StartupStorageCommand) startup; + host.setParent(ssc.getParent()); + host.setTotalSize(ssc.getTotalSize()); + host.setHypervisorType(HypervisorType.None); + if (ssc.getNfsShare() != null) { + host.setStorageUrl(ssc.getNfsShare()); + } + } + if (startup.getStorageIpAddressDeux() != null) { + host.setStorageIpAddressDeux(startup.getStorageIpAddressDeux()); + host.setStorageMacAddressDeux(startup.getStorageMacAddressDeux()); + host.setStorageNetmaskDeux(startup.getStorageNetmaskDeux()); + } + + } + + @Override + public Host getHost(long hostId) { + return _hostDao.findById(hostId); + } + + // create capacity entries if none exist for this server + private void createCapacityEntry(final StartupCommand startup, HostVO server) { + SearchCriteria capacitySC = _capacityDao.createSearchCriteria(); + capacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, server.getId()); + capacitySC.addAnd("dataCenterId", SearchCriteria.Op.EQ, server.getDataCenterId()); + capacitySC.addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); + List capacities = _capacityDao.search(capacitySC, null); + + // remove old entries, we'll recalculate them anyway + if (startup instanceof StartupStorageCommand) { + if ((capacities != null) && !capacities.isEmpty()) { + for (CapacityVO capacity : capacities) { + _capacityDao.remove(capacity.getId()); + } + } + } + + if (startup instanceof StartupStorageCommand) { + StartupStorageCommand ssCmd = (StartupStorageCommand) startup; + if (ssCmd.getResourceType() == Storage.StorageResourceType.STORAGE_HOST) { + CapacityVO capacity = new CapacityVO(server.getId(), server.getDataCenterId(), server.getPodId(), server.getClusterId(), 0L, (long) (server.getTotalSize() * _overProvisioningFactor), + CapacityVO.CAPACITY_TYPE_STORAGE_ALLOCATED); + _capacityDao.persist(capacity); + } + } else if (startup instanceof StartupRoutingCommand) { + SearchCriteria capacityCPU = _capacityDao.createSearchCriteria(); + capacityCPU.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, server.getId()); + capacityCPU.addAnd("dataCenterId", SearchCriteria.Op.EQ, server.getDataCenterId()); + capacityCPU.addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); + capacityCPU.addAnd("capacityType", SearchCriteria.Op.EQ, CapacityVO.CAPACITY_TYPE_CPU); + List capacityVOCpus = _capacityDao.search(capacitySC, null); + + if (capacityVOCpus != null && !capacityVOCpus.isEmpty()) { + CapacityVO CapacityVOCpu = capacityVOCpus.get(0); + long newTotalCpu = (long) (server.getCpus().longValue() * server.getSpeed().longValue() * _cpuOverProvisioningFactor); + if ((CapacityVOCpu.getTotalCapacity() <= newTotalCpu) || ((CapacityVOCpu.getUsedCapacity() + CapacityVOCpu.getReservedCapacity()) <= newTotalCpu)) { + CapacityVOCpu.setTotalCapacity(newTotalCpu); + } else if ((CapacityVOCpu.getUsedCapacity() + CapacityVOCpu.getReservedCapacity() > newTotalCpu) && (CapacityVOCpu.getUsedCapacity() < newTotalCpu)) { + CapacityVOCpu.setReservedCapacity(0); + CapacityVOCpu.setTotalCapacity(newTotalCpu); + } else { + s_logger.debug("What? new cpu is :" + newTotalCpu + ", old one is " + CapacityVOCpu.getUsedCapacity() + "," + CapacityVOCpu.getReservedCapacity() + "," + + CapacityVOCpu.getTotalCapacity()); + } + _capacityDao.update(CapacityVOCpu.getId(), CapacityVOCpu); + } else { + CapacityVO capacity = new CapacityVO(server.getId(), server.getDataCenterId(), server.getPodId(), server.getClusterId(), 0L, (long) (server.getCpus().longValue() + * server.getSpeed().longValue() * _cpuOverProvisioningFactor), CapacityVO.CAPACITY_TYPE_CPU); + _capacityDao.persist(capacity); + } + + SearchCriteria capacityMem = _capacityDao.createSearchCriteria(); + capacityMem.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, server.getId()); + capacityMem.addAnd("dataCenterId", SearchCriteria.Op.EQ, server.getDataCenterId()); + capacityMem.addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); + capacityMem.addAnd("capacityType", SearchCriteria.Op.EQ, CapacityVO.CAPACITY_TYPE_MEMORY); + List capacityVOMems = _capacityDao.search(capacityMem, null); + + if (capacityVOMems != null && !capacityVOMems.isEmpty()) { + CapacityVO CapacityVOMem = capacityVOMems.get(0); + long newTotalMem = server.getTotalMemory(); + if (CapacityVOMem.getTotalCapacity() <= newTotalMem || (CapacityVOMem.getUsedCapacity() + CapacityVOMem.getReservedCapacity() <= newTotalMem)) { + CapacityVOMem.setTotalCapacity(newTotalMem); + } else if (CapacityVOMem.getUsedCapacity() + CapacityVOMem.getReservedCapacity() > newTotalMem && CapacityVOMem.getUsedCapacity() < newTotalMem) { + CapacityVOMem.setReservedCapacity(0); + CapacityVOMem.setTotalCapacity(newTotalMem); + } else { + s_logger.debug("What? new cpu is :" + newTotalMem + ", old one is " + CapacityVOMem.getUsedCapacity() + "," + CapacityVOMem.getReservedCapacity() + "," + + CapacityVOMem.getTotalCapacity()); + } + _capacityDao.update(CapacityVOMem.getId(), CapacityVOMem); + } else { + CapacityVO capacity = new CapacityVO(server.getId(), server.getDataCenterId(), server.getPodId(), server.getClusterId(), 0L, server.getTotalMemory(), CapacityVO.CAPACITY_TYPE_MEMORY); + _capacityDao.persist(capacity); + } + } + + } + + // protected void upgradeAgent(final Link link, final byte[] request, final + // String reason) { + // + // if (reason == UnsupportedVersionException.IncompatibleVersion) { + // final UpgradeResponse response = new UpgradeResponse(request, + // _upgradeMgr.getAgentUrl()); + // try { + // s_logger.info("Asking for the agent to update due to incompatible version: " + // + response.toString()); + // link.send(response.toBytes()); + // } catch (final ClosedChannelException e) { + // s_logger.warn("Unable to send response due to connection closed: " + + // response.toString()); + // } + // return; + // } + // + // assert (reason == UnsupportedVersionException.UnknownVersion) : + // "Unknown reason: " + reason; + // final UpgradeResponse response = new UpgradeResponse(request, + // _upgradeMgr.getAgentUrl()); + // try { + // s_logger.info("Asking for the agent to update due to unknown version: " + + // response.toString()); + // link.send(response.toBytes()); + // } catch (final ClosedChannelException e) { + // s_logger.warn("Unable to send response due to connection closed: " + + // response.toString()); + // } + // } + + protected class SimulateStartTask implements Runnable { + ServerResource resource; + Map details; + long id; + ActionDelegate actionDelegate; + + public SimulateStartTask(long id, ServerResource resource, Map details, ActionDelegate actionDelegate) { + this.id = id; + this.resource = resource; + this.details = details; + this.actionDelegate = actionDelegate; + } + + @Override + public void run() { AgentAttache at = null; - try { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Simulating start for resource " - + resource.getName() + " id " + id); - } - simulateStart(resource, details, false, null, null); - } catch (Exception e) { - s_logger.warn("Unable to simulate start on resource " + id - + " name " + resource.getName(), e); - } finally { - if (actionDelegate != null) { - actionDelegate.action(new Long(id)); - } - if ( at == null ) { - HostVO host = _hostDao.findById(id); - host.setManagementServerId(null); - _hostDao.update(id, host); - } - StackMaid.current().exitCleanup(); - } - } - } + try { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Simulating start for resource " + resource.getName() + " id " + id); + } + simulateStart(resource, details, false, null, null); + } catch (Exception e) { + s_logger.warn("Unable to simulate start on resource " + id + " name " + resource.getName(), e); + } finally { + if (actionDelegate != null) { + actionDelegate.action(new Long(id)); + } + if (at == null) { + HostVO host = _hostDao.findById(id); + host.setManagementServerId(null); + _hostDao.update(id, host); + } + StackMaid.current().exitCleanup(); + } + } + } - public class AgentHandler extends Task { - public AgentHandler(Task.Type type, Link link, byte[] data) { - super(type, link, data); - } + public class AgentHandler extends Task { + public AgentHandler(Task.Type type, Link link, byte[] data) { + super(type, link, data); + } - protected void processRequest(final Link link, final Request request) { - AgentAttache attache = (AgentAttache) link.attachment(); - final Command[] cmds = request.getCommands(); - Command cmd = cmds[0]; - boolean logD = true; + protected void processRequest(final Link link, final Request request) { + AgentAttache attache = (AgentAttache) link.attachment(); + final Command[] cmds = request.getCommands(); + Command cmd = cmds[0]; + boolean logD = true; - Response response = null; - if (attache == null) { - s_logger.debug("Processing sequence " + request.getSequence() - + ": Processing " + request.toString()); - if (!(cmd instanceof StartupCommand)) { - s_logger.warn("Throwing away a request because it came through as the first command on a connect: " - + request.toString()); - return; - } - StartupCommand startup = (StartupCommand) cmd; - // if ((_upgradeMgr.registerForUpgrade(-1, startup.getVersion()) - // == UpgradeManager.State.RequiresUpdate) && - // (_upgradeMgr.getAgentUrl() != null)) { - // final UpgradeCommand upgrade = new - // UpgradeCommand(_upgradeMgr.getAgentUrl()); - // final Request req = new Request(1, -1, -1, new Command[] { - // upgrade }, true, true); - // s_logger.info("Agent requires upgrade: " + req.toString()); - // try { - // link.send(req.toBytes()); - // } catch (ClosedChannelException e) { - // s_logger.warn("Unable to tell agent it should update."); - // } - // return; - // } - try { - StartupCommand[] startups = new StartupCommand[cmds.length]; - for (int i = 0; i < cmds.length; i++) { - startups[i] = (StartupCommand) cmds[i]; - } - attache = handleConnect(link, startups); - } catch (final IllegalArgumentException e) { - _alertMgr.sendAlert( - AlertManager.ALERT_TYPE_HOST, - 0, - new Long(0), - "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect due to " - + e.getMessage(), - "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect with " - + request.toString() + " because of " - + e.getMessage()); - s_logger.warn("Unable to create attache for agent: " - + request.toString(), e); - response = new Response(request, new StartupAnswer( - (StartupCommand) cmd, e.getMessage()), _nodeId, -1); - } catch (ConnectionException e) { - _alertMgr.sendAlert( - AlertManager.ALERT_TYPE_HOST, - 0, - new Long(0), - "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect due to " - + e.getMessage(), - "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect with " - + request.toString() + " because of " - + e.getMessage()); - s_logger.warn("Unable to create attache for agent: " - + request.toString(), e); - response = new Response(request, new StartupAnswer( - (StartupCommand) cmd, e.getMessage()), _nodeId, -1); - } catch (final CloudRuntimeException e) { - _alertMgr.sendAlert( - AlertManager.ALERT_TYPE_HOST, - 0, - new Long(0), - "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect due to " - + e.getMessage(), - "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect with " - + request.toString() + " because of " - + e.getMessage()); - s_logger.warn("Unable to create attache for agent: " - + request.toString(), e); - } - if (attache == null) { - if (response == null) { - s_logger.warn("Unable to create attache for agent: " - + request.toString()); - response = new Response(request, new StartupAnswer( - (StartupCommand) request.getCommand(), - "Unable to register this agent"), _nodeId, -1); - } - try { - link.send(response.toBytes(), true); - } catch (final ClosedChannelException e) { - s_logger.warn("Response was not sent: " - + response.toString()); - } - return; - } - } + Response response = null; + if (attache == null) { + s_logger.debug("Processing sequence " + request.getSequence() + ": Processing " + request.toString()); + if (!(cmd instanceof StartupCommand)) { + s_logger.warn("Throwing away a request because it came through as the first command on a connect: " + request.toString()); + return; + } + StartupCommand startup = (StartupCommand) cmd; + // if ((_upgradeMgr.registerForUpgrade(-1, startup.getVersion()) + // == UpgradeManager.State.RequiresUpdate) && + // (_upgradeMgr.getAgentUrl() != null)) { + // final UpgradeCommand upgrade = new + // UpgradeCommand(_upgradeMgr.getAgentUrl()); + // final Request req = new Request(1, -1, -1, new Command[] { + // upgrade }, true, true); + // s_logger.info("Agent requires upgrade: " + req.toString()); + // try { + // link.send(req.toBytes()); + // } catch (ClosedChannelException e) { + // s_logger.warn("Unable to tell agent it should update."); + // } + // return; + // } + try { + StartupCommand[] startups = new StartupCommand[cmds.length]; + for (int i = 0; i < cmds.length; i++) { + startups[i] = (StartupCommand) cmds[i]; + } + attache = handleConnect(link, startups); + } catch (final IllegalArgumentException e) { + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, 0, new Long(0), "Agent from " + startup.getPrivateIpAddress() + " is unable to connect due to " + e.getMessage(), "Agent from " + + startup.getPrivateIpAddress() + " is unable to connect with " + request.toString() + " because of " + e.getMessage()); + s_logger.warn("Unable to create attache for agent: " + request.toString(), e); + response = new Response(request, new StartupAnswer((StartupCommand) cmd, e.getMessage()), _nodeId, -1); + } catch (ConnectionException e) { + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, 0, new Long(0), "Agent from " + startup.getPrivateIpAddress() + " is unable to connect due to " + e.getMessage(), "Agent from " + + startup.getPrivateIpAddress() + " is unable to connect with " + request.toString() + " because of " + e.getMessage()); + s_logger.warn("Unable to create attache for agent: " + request.toString(), e); + response = new Response(request, new StartupAnswer((StartupCommand) cmd, e.getMessage()), _nodeId, -1); + } catch (final CloudRuntimeException e) { + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, 0, new Long(0), "Agent from " + startup.getPrivateIpAddress() + " is unable to connect due to " + e.getMessage(), "Agent from " + + startup.getPrivateIpAddress() + " is unable to connect with " + request.toString() + " because of " + e.getMessage()); + s_logger.warn("Unable to create attache for agent: " + request.toString(), e); + } + if (attache == null) { + if (response == null) { + s_logger.warn("Unable to create attache for agent: " + request.toString()); + response = new Response(request, new StartupAnswer((StartupCommand) request.getCommand(), "Unable to register this agent"), _nodeId, -1); + } + try { + link.send(response.toBytes(), true); + } catch (final ClosedChannelException e) { + s_logger.warn("Response was not sent: " + response.toString()); + } + return; + } + } - final long hostId = attache.getId(); + final long hostId = attache.getId(); - if (s_logger.isDebugEnabled()) { - if (cmd instanceof PingRoutingCommand) { - final PingRoutingCommand ping = (PingRoutingCommand) cmd; - if (ping.getNewStates().size() > 0) { - s_logger.debug("SeqA " + hostId + "-" - + request.getSequence() + ": Processing " - + request.toString()); - } else { - logD = false; - s_logger.debug("Ping from " + hostId); - s_logger.trace("SeqA " + hostId + "-" - + request.getSequence() + ": Processing " - + request.toString()); - } - } else if (cmd instanceof PingCommand) { - logD = false; - s_logger.debug("Ping from " + hostId); - s_logger.trace("SeqA " + attache.getId() + "-" - + request.getSequence() + ": Processing " - + request.toString()); - } else { - s_logger.debug("SeqA " + attache.getId() + "-" - + request.getSequence() + ": Processing " - + request.toString()); - } - } + if (s_logger.isDebugEnabled()) { + if (cmd instanceof PingRoutingCommand) { + final PingRoutingCommand ping = (PingRoutingCommand) cmd; + if (ping.getNewStates().size() > 0) { + s_logger.debug("SeqA " + hostId + "-" + request.getSequence() + ": Processing " + request.toString()); + } else { + logD = false; + s_logger.debug("Ping from " + hostId); + s_logger.trace("SeqA " + hostId + "-" + request.getSequence() + ": Processing " + request.toString()); + } + } else if (cmd instanceof PingCommand) { + logD = false; + s_logger.debug("Ping from " + hostId); + s_logger.trace("SeqA " + attache.getId() + "-" + request.getSequence() + ": Processing " + request.toString()); + } else { + s_logger.debug("SeqA " + attache.getId() + "-" + request.getSequence() + ": Processing " + request.toString()); + } + } - final Answer[] answers = new Answer[cmds.length]; - for (int i = 0; i < cmds.length; i++) { - cmd = cmds[i]; - Answer answer = null; - try { - if (cmd instanceof StartupRoutingCommand) { - final StartupRoutingCommand startup = (StartupRoutingCommand) cmd; - answer = new StartupAnswer(startup, attache.getId(), - getPingInterval()); - } else if (cmd instanceof StartupProxyCommand) { - final StartupProxyCommand startup = (StartupProxyCommand) cmd; - answer = new StartupAnswer(startup, attache.getId(), - getPingInterval()); - } else if (cmd instanceof StartupStorageCommand) { - final StartupStorageCommand startup = (StartupStorageCommand) cmd; - answer = new StartupAnswer(startup, attache.getId(), - getPingInterval()); - } else if (cmd instanceof ShutdownCommand) { - final ShutdownCommand shutdown = (ShutdownCommand) cmd; - final String reason = shutdown.getReason(); - s_logger.info("Host " - + attache.getId() - + " has informed us that it is shutting down with reason " - + reason + " and detail " - + shutdown.getDetail()); - if (reason.equals(ShutdownCommand.Update)) { - disconnect(attache, Event.UpdateNeeded, false); - } else if (reason.equals(ShutdownCommand.Requested)) { - disconnect(attache, Event.ShutdownRequested, false); - } - return; - } else if (cmd instanceof AgentControlCommand) { - answer = handleControlCommand(attache, - (AgentControlCommand) cmd); - } else { - handleCommands(attache, request.getSequence(), - new Command[] { cmd }); - if (cmd instanceof PingCommand) { - long cmdHostId = ((PingCommand) cmd).getHostId(); + final Answer[] answers = new Answer[cmds.length]; + for (int i = 0; i < cmds.length; i++) { + cmd = cmds[i]; + Answer answer = null; + try { + if (cmd instanceof StartupRoutingCommand) { + final StartupRoutingCommand startup = (StartupRoutingCommand) cmd; + answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); + } else if (cmd instanceof StartupProxyCommand) { + final StartupProxyCommand startup = (StartupProxyCommand) cmd; + answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); + } else if (cmd instanceof StartupStorageCommand) { + final StartupStorageCommand startup = (StartupStorageCommand) cmd; + answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); + } else if (cmd instanceof ShutdownCommand) { + final ShutdownCommand shutdown = (ShutdownCommand) cmd; + final String reason = shutdown.getReason(); + s_logger.info("Host " + attache.getId() + " has informed us that it is shutting down with reason " + reason + " and detail " + shutdown.getDetail()); + if (reason.equals(ShutdownCommand.Update)) { + disconnect(attache, Event.UpdateNeeded, false); + } else if (reason.equals(ShutdownCommand.Requested)) { + disconnect(attache, Event.ShutdownRequested, false); + } + return; + } else if (cmd instanceof AgentControlCommand) { + answer = handleControlCommand(attache, (AgentControlCommand) cmd); + } else { + handleCommands(attache, request.getSequence(), new Command[] { cmd }); + if (cmd instanceof PingCommand) { + long cmdHostId = ((PingCommand) cmd).getHostId(); - // if the router is sending a ping, verify the - // gateway was pingable - if (cmd instanceof PingRoutingCommand) { - boolean gatewayAccessible = ((PingRoutingCommand) cmd) - .isGatewayAccessible(); - HostVO host = _hostDao.findById(Long - .valueOf(cmdHostId)); - if (!gatewayAccessible) { - // alert that host lost connection to - // gateway (cannot ping the default route) - DataCenterVO dcVO = _dcDao.findById(host - .getDataCenterId()); - HostPodVO podVO = _podDao.findById(host - .getPodId()); - String hostDesc = "name: " + host.getName() - + " (id:" + host.getId() - + "), availability zone: " - + dcVO.getName() + ", pod: " - + podVO.getName(); + // if the router is sending a ping, verify the + // gateway was pingable + if (cmd instanceof PingRoutingCommand) { + boolean gatewayAccessible = ((PingRoutingCommand) cmd).isGatewayAccessible(); + HostVO host = _hostDao.findById(Long.valueOf(cmdHostId)); + if (!gatewayAccessible) { + // alert that host lost connection to + // gateway (cannot ping the default route) + DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); + HostPodVO podVO = _podDao.findById(host.getPodId()); + String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); - _alertMgr - .sendAlert( - AlertManager.ALERT_TYPE_ROUTING, - host.getDataCenterId(), - host.getPodId(), - "Host lost connection to gateway, " - + hostDesc, - "Host [" - + hostDesc - + "] lost connection to gateway (default route) and is possibly having network connection issues."); - } else { - _alertMgr.clearAlert( - AlertManager.ALERT_TYPE_ROUTING, - host.getDataCenterId(), - host.getPodId()); - } - } - answer = new PingAnswer((PingCommand) cmd); - } else if (cmd instanceof ReadyAnswer) { - HostVO host = _hostDao.findById(attache.getId()); - if (host == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cant not find host " - + attache.getId()); - } - } - answer = new Answer(cmd); - } else { - answer = new Answer(cmd); - } - } - } catch (final Throwable th) { - s_logger.warn("Caught: ", th); - answer = new Answer(cmd, false, th.getMessage()); - } - answers[i] = answer; - } + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId(), "Host lost connection to gateway, " + hostDesc, "Host [" + hostDesc + + "] lost connection to gateway (default route) and is possibly having network connection issues."); + } else { + _alertMgr.clearAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId()); + } + } + answer = new PingAnswer((PingCommand) cmd); + } else if (cmd instanceof ReadyAnswer) { + HostVO host = _hostDao.findById(attache.getId()); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cant not find host " + attache.getId()); + } + } + answer = new Answer(cmd); + } else { + answer = new Answer(cmd); + } + } + } catch (final Throwable th) { + s_logger.warn("Caught: ", th); + answer = new Answer(cmd, false, th.getMessage()); + } + answers[i] = answer; + } - response = new Response(request, answers, _nodeId, attache.getId()); - if (s_logger.isDebugEnabled()) { - if (logD) { - s_logger.debug("SeqA " + attache.getId() + "-" - + response.getSequence() + ": Sending " - + response.toString()); - } else { - s_logger.trace("SeqA " + attache.getId() + "-" - + response.getSequence() + ": Sending " - + response.toString()); - } - } - try { - link.send(response.toBytes()); - } catch (final ClosedChannelException e) { - s_logger.warn("Unable to send response because connection is closed: " - + response.toString()); - } - } + response = new Response(request, answers, _nodeId, attache.getId()); + if (s_logger.isDebugEnabled()) { + if (logD) { + s_logger.debug("SeqA " + attache.getId() + "-" + response.getSequence() + ": Sending " + response.toString()); + } else { + s_logger.trace("SeqA " + attache.getId() + "-" + response.getSequence() + ": Sending " + response.toString()); + } + } + try { + link.send(response.toBytes()); + } catch (final ClosedChannelException e) { + s_logger.warn("Unable to send response because connection is closed: " + response.toString()); + } + } - protected void processResponse(final Link link, final Response response) { - final AgentAttache attache = (AgentAttache) link.attachment(); - if (attache == null) { - s_logger.warn("Unable to process: " + response.toString()); - } + protected void processResponse(final Link link, final Response response) { + final AgentAttache attache = (AgentAttache) link.attachment(); + if (attache == null) { + s_logger.warn("Unable to process: " + response.toString()); + } - if (!attache.processAnswers(response.getSequence(), response)) { - s_logger.info("Host " + attache.getId() + " - Seq " - + response.getSequence() - + ": Response is not processed: " + response.toString()); - } - } + if (!attache.processAnswers(response.getSequence(), response)) { + s_logger.info("Host " + attache.getId() + " - Seq " + response.getSequence() + ": Response is not processed: " + response.toString()); + } + } - @Override - protected void doTask(final Task task) throws Exception { - Transaction txn = Transaction.open(Transaction.CLOUD_DB); - try { - final Type type = task.getType(); - if (type == Task.Type.DATA) { - final byte[] data = task.getData(); - try { - final Request event = Request.parse(data); - if (event instanceof Response) { - processResponse(task.getLink(), (Response) event); - } else { - processRequest(task.getLink(), event); - } - } catch (final UnsupportedVersionException e) { - s_logger.warn(e.getMessage()); - // upgradeAgent(task.getLink(), data, e.getReason()); - } - } else if (type == Task.Type.CONNECT) { - } else if (type == Task.Type.DISCONNECT) { - final Link link = task.getLink(); - final AgentAttache attache = (AgentAttache) link - .attachment(); - if (attache != null) { - disconnect(attache, Event.AgentDisconnected, true); - } else { - s_logger.info("Connection from " + link.getIpAddress() - + " closed but no cleanup was done."); - link.close(); - link.terminated(); - } - } - } finally { - StackMaid.current().exitCleanup(); - txn.close(); - } - } - } + @Override + protected void doTask(final Task task) throws Exception { + Transaction txn = Transaction.open(Transaction.CLOUD_DB); + try { + final Type type = task.getType(); + if (type == Task.Type.DATA) { + final byte[] data = task.getData(); + try { + final Request event = Request.parse(data); + if (event instanceof Response) { + processResponse(task.getLink(), (Response) event); + } else { + processRequest(task.getLink(), event); + } + } catch (final UnsupportedVersionException e) { + s_logger.warn(e.getMessage()); + // upgradeAgent(task.getLink(), data, e.getReason()); + } + } else if (type == Task.Type.CONNECT) { + } else if (type == Task.Type.DISCONNECT) { + final Link link = task.getLink(); + final AgentAttache attache = (AgentAttache) link.attachment(); + if (attache != null) { + disconnect(attache, Event.AgentDisconnected, true); + } else { + s_logger.info("Connection from " + link.getIpAddress() + " closed but no cleanup was done."); + link.close(); + link.terminated(); + } + } + } finally { + StackMaid.current().exitCleanup(); + txn.close(); + } + } + } - protected AgentManagerImpl() { - } + protected AgentManagerImpl() { + } } diff --git a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index a5d617ded6b..e016e39ad93 100644 --- a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -40,6 +40,7 @@ import com.cloud.host.Status.Event; import com.cloud.resource.ResourceService; import com.cloud.resource.ServerResource; import com.cloud.storage.resource.DummySecondaryStorageResource; +import com.cloud.user.User; import com.cloud.utils.component.Inject; import com.cloud.utils.db.DB; import com.cloud.utils.db.GlobalLock; @@ -47,538 +48,544 @@ import com.cloud.utils.db.Transaction; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.utils.nio.Link; import com.cloud.utils.nio.Task; - -@Local(value={AgentManager.class, ResourceService.class}) +@Local(value = { AgentManager.class, ResourceService.class }) public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener { - final static Logger s_logger = Logger.getLogger(ClusteredAgentManagerImpl.class); - - public final static long STARTUP_DELAY = 5000; - public final static long SCAN_INTERVAL = 90000; // 90 seconds, it takes 60 sec for xenserver to fail login - public final static int ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION = 5; // 5 seconds - public final static long LOAD_SIZE = 100; - - @Inject protected ClusterManager _clusterMgr = null; - + final static Logger s_logger = Logger.getLogger(ClusteredAgentManagerImpl.class); + + public final static long STARTUP_DELAY = 5000; + public final static long SCAN_INTERVAL = 90000; // 90 seconds, it takes 60 sec for xenserver to fail login + public final static int ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION = 5; // 5 seconds + public final static long LOAD_SIZE = 100; + + @Inject + protected ClusterManager _clusterMgr = null; + protected HashMap _peers; private final Timer _timer = new Timer("ClusteredAgentManager Timer"); - - @Inject protected ManagementServerHostDao _mshostDao; + + @Inject + protected ManagementServerHostDao _mshostDao; protected ClusteredAgentManagerImpl() { - super(); + super(); } - @Override - public boolean configure(String name, Map params) throws ConfigurationException { - _peers = new HashMap(7); + @Override + public boolean configure(String name, Map params) throws ConfigurationException { + _peers = new HashMap(7); _nodeId = _clusterMgr.getManagementNodeId(); - + ClusteredAgentAttache.initialize(this); - + _clusterMgr.registerListener(this); - + return super.configure(name, params); - } - - @Override - public boolean start() { - if (!super.start()) { - return false; - } + } + + @Override + public boolean start() { + if (!super.start()) { + return false; + } _timer.schedule(new DirectAgentScanTimerTask(), STARTUP_DELAY, SCAN_INTERVAL); return true; - } - - private void runDirectAgentScanTimerTask() { - GlobalLock scanLock = GlobalLock.getInternLock("clustermgr.scan"); - try { - if(scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) { - try { - scanDirectAgentToLoad(); - } finally { - scanLock.unlock(); - } - } - } finally { - scanLock.releaseRef(); - } - } - - private void scanDirectAgentToLoad() { - if(s_logger.isTraceEnabled()) { + } + + private void runDirectAgentScanTimerTask() { + GlobalLock scanLock = GlobalLock.getInternLock("clustermgr.scan"); + try { + if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) { + try { + scanDirectAgentToLoad(); + } finally { + scanLock.unlock(); + } + } + } finally { + scanLock.releaseRef(); + } + } + + private void scanDirectAgentToLoad() { + if (s_logger.isTraceEnabled()) { s_logger.trace("Begin scanning directly connected hosts"); } - // for agents that are self-managed, threshold to be considered as disconnected is 3 ping intervals - long cutSeconds = (System.currentTimeMillis() >> 10) - (_pingInterval*3); - List hosts = _hostDao.findDirectAgentToLoad(_clusterMgr.getManagementNodeId(), cutSeconds, LOAD_SIZE); - if ( hosts != null && hosts.size() == LOAD_SIZE ) { - Long clusterId = hosts.get((int)(LOAD_SIZE-1)).getClusterId(); - if ( clusterId != null) { - for ( int i = (int)(LOAD_SIZE-1); i > 0; i-- ) { - if ( hosts.get(i).getClusterId() == clusterId ) { + // for agents that are self-managed, threshold to be considered as disconnected is 3 ping intervals + long cutSeconds = (System.currentTimeMillis() >> 10) - (_pingInterval * 3); + List hosts = _hostDao.findDirectAgentToLoad(_clusterMgr.getManagementNodeId(), cutSeconds, LOAD_SIZE); + if (hosts != null && hosts.size() == LOAD_SIZE) { + Long clusterId = hosts.get((int) (LOAD_SIZE - 1)).getClusterId(); + if (clusterId != null) { + for (int i = (int) (LOAD_SIZE - 1); i > 0; i--) { + if (hosts.get(i).getClusterId() == clusterId) { hosts.remove(i); } else { break; } } - } - } - if(hosts != null && hosts.size() > 0) { - for(HostVO host: hosts) { - AgentAttache agentattache = findAttache(host.getId()); - if(agentattache != null) { - // already loaded, skip - if(agentattache.forForward()) { - if(s_logger.isInfoEnabled()) { + } + } + if (hosts != null && hosts.size() > 0) { + for (HostVO host : hosts) { + AgentAttache agentattache = findAttache(host.getId()); + if (agentattache != null) { + // already loaded, skip + if (agentattache.forForward()) { + if (s_logger.isInfoEnabled()) { s_logger.info("Host " + host.getName() + " is detected down, but we have a forward attache running, disconnect this one before launching the host"); } - removeAgent(agentattache, Status.Disconnected); - } else { - continue; - } - } - - if(s_logger.isDebugEnabled()) { + removeAgent(agentattache, Status.Disconnected); + } else { + continue; + } + } + + if (s_logger.isDebugEnabled()) { s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ")"); } - loadDirectlyConnectedHost(host); - } - } - - if(s_logger.isTraceEnabled()) { + loadDirectlyConnectedHost(host); + } + } + + if (s_logger.isTraceEnabled()) { s_logger.trace("End scanning directly connected hosts"); } - } - - private class DirectAgentScanTimerTask extends TimerTask { + } + + private class DirectAgentScanTimerTask extends TimerTask { @Override public void run() { - try { - runDirectAgentScanTimerTask(); - } catch(Throwable e) { - s_logger.error("Unexpected exception " + e.getMessage(), e); - } + try { + runDirectAgentScanTimerTask(); + } catch (Throwable e) { + s_logger.error("Unexpected exception " + e.getMessage(), e); + } } - } - + } + @Override public Task create(Task.Type type, Link link, byte[] data) { - return new ClusteredAgentHandler(type, link, data); - } - + return new ClusteredAgentHandler(type, link, data); + } + @Override public boolean cancelMaintenance(final long hostId) { - try { - Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.ResetRequested); - - if (result != null) { - return result; - } - } catch (AgentUnavailableException e) { - return false; - } - - return super.cancelMaintenance(hostId); + try { + Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.ResetRequested); + + if (result != null) { + return result; + } + } catch (AgentUnavailableException e) { + return false; + } + + return super.cancelMaintenance(hostId); } - + protected AgentAttache createAttache(long id) { s_logger.debug("create forwarding ClusteredAgentAttache for " + id); final AgentAttache attache = new ClusteredAgentAttache(this, id); AgentAttache old = null; - synchronized(_agents) { - old = _agents.get(id); + synchronized (_agents) { + old = _agents.get(id); _agents.put(id, attache); } - if( old != null ) { + if (old != null) { old.disconnect(Status.Removed); } return attache; } - + @Override - protected AgentAttache createAttache(long id, HostVO server, Link link) { + protected AgentAttache createAttache(long id, HostVO server, Link link) { s_logger.debug("create ClusteredAgentAttache for " + id); - final AgentAttache attache = new ClusteredAgentAttache(this, id, link, server.getStatus() == Status.Maintenance || server.getStatus() == Status.ErrorInMaintenance || server.getStatus() == Status.PrepareForMaintenance); + final AgentAttache attache = new ClusteredAgentAttache(this, id, link, server.getStatus() == Status.Maintenance || server.getStatus() == Status.ErrorInMaintenance + || server.getStatus() == Status.PrepareForMaintenance); link.attach(attache); AgentAttache old = null; - synchronized(_agents) { - old = _agents.get(id); + synchronized (_agents) { + old = _agents.get(id); _agents.put(id, attache); } - if( old != null ) { + if (old != null) { old.disconnect(Status.Removed); } return attache; } - + @Override protected AgentAttache createAttache(long id, HostVO server, ServerResource resource) { if (resource instanceof DummySecondaryStorageResource) { return new DummyAttache(this, id, false); } s_logger.debug("create ClusteredDirectAgentAttache for " + id); - final DirectAgentAttache attache = new ClusteredDirectAgentAttache(this, id, _nodeId, resource, server.getStatus() == Status.Maintenance - || server.getStatus() == Status.ErrorInMaintenance || server.getStatus() == Status.PrepareForMaintenance, this); + final DirectAgentAttache attache = new ClusteredDirectAgentAttache(this, id, _nodeId, resource, server.getStatus() == Status.Maintenance || server.getStatus() == Status.ErrorInMaintenance + || server.getStatus() == Status.PrepareForMaintenance, this); AgentAttache old = null; synchronized (_agents) { old = _agents.get(id); _agents.put(id, attache); } - if( old != null ) { + if (old != null) { old.disconnect(Status.Removed); } return attache; } - + @Override protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate) { return handleDisconnect(attache, event, investigate, true); } - - protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) { - if( agent == null ) { + + protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) { + if (agent == null) { return true; } - - if (super.handleDisconnect(agent, event, investigate)) { - if (broadcast) { - notifyNodesInCluster(agent); - } - return true; - } else { - return false; - } + + if (super.handleDisconnect(agent, event, investigate)) { + if (broadcast) { + notifyNodesInCluster(agent); + } + return true; + } else { + return false; + } } - + @Override public boolean executeUserRequest(long hostId, Event event) throws AgentUnavailableException { if (event == Event.AgentDisconnected) { if (s_logger.isDebugEnabled()) { s_logger.debug("Received agent disconnect event for host " + hostId); } - AgentAttache attache = findAttache(hostId); + AgentAttache attache = findAttache(hostId); if (attache != null) { handleDisconnect(attache, Event.AgentDisconnected, false, false); } - + return true; } else { return super.executeUserRequest(hostId, event); } } - + @Override public boolean maintain(final long hostId) throws AgentUnavailableException { - Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.MaintenanceRequested); - if (result != null) { - return result; - } - - return super.maintain(hostId); + Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.MaintenanceRequested); + if (result != null) { + return result; + } + + return super.maintain(hostId); } - + @Override public boolean reconnect(final long hostId) throws AgentUnavailableException { Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.ShutdownRequested); if (result != null) { return result; } - + return super.reconnect(hostId); } - - @Override @DB - public boolean deleteHost(long hostId) { - try { - Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.Remove); - if (result != null) { - return result; - } - } catch (AgentUnavailableException e) { - return false; - } - - return super.deleteHost(hostId); + + @Override + @DB + public boolean deleteHost(long hostId, boolean isForced, User caller) { + try { + Boolean result = _clusterMgr.propagateAgentEvent(hostId, Event.Remove); + if (result != null) { + return result; + } + } catch (AgentUnavailableException e) { + return false; + } + + return super.deleteHost(hostId, isForced, caller); } - + public void notifyNodesInCluster(AgentAttache attache) { s_logger.debug("Notifying other nodes of to disconnect"); - Command[] cmds = new Command[] { new ChangeAgentCommand(attache.getId(), Event.AgentDisconnected) }; - _clusterMgr.broadcast(attache.getId(), cmds); + Command[] cmds = new Command[] { new ChangeAgentCommand(attache.getId(), Event.AgentDisconnected) }; + _clusterMgr.broadcast(attache.getId(), cmds); } - + protected static void logT(byte[] bytes, final String msg) { - s_logger.trace("Seq " + Request.getAgentId(bytes) + "-" + Request.getSequence(bytes) + ": MgmtId " + Request.getManagementServerId(bytes) + ": " + (Request.isRequest(bytes) ? "Req: " : "Resp: ") + msg); + s_logger.trace("Seq " + Request.getAgentId(bytes) + "-" + Request.getSequence(bytes) + ": MgmtId " + Request.getManagementServerId(bytes) + ": " + + (Request.isRequest(bytes) ? "Req: " : "Resp: ") + msg); } - + protected static void logD(byte[] bytes, final String msg) { - s_logger.debug("Seq " + Request.getAgentId(bytes) + "-" + Request.getSequence(bytes) + ": MgmtId " + Request.getManagementServerId(bytes) + ": " + (Request.isRequest(bytes) ? "Req: " : "Resp: ") + msg); + s_logger.debug("Seq " + Request.getAgentId(bytes) + "-" + Request.getSequence(bytes) + ": MgmtId " + Request.getManagementServerId(bytes) + ": " + + (Request.isRequest(bytes) ? "Req: " : "Resp: ") + msg); } - + protected static void logI(byte[] bytes, final String msg) { - s_logger.info("Seq " + Request.getAgentId(bytes) + "-" + Request.getSequence(bytes) + ": MgmtId " + Request.getManagementServerId(bytes) + ": " + (Request.isRequest(bytes) ? "Req: " : "Resp: ") + msg); + s_logger.info("Seq " + Request.getAgentId(bytes) + "-" + Request.getSequence(bytes) + ": MgmtId " + Request.getManagementServerId(bytes) + ": " + + (Request.isRequest(bytes) ? "Req: " : "Resp: ") + msg); } - + public boolean routeToPeer(String peer, byte[] bytes) { - int i = 0; - SocketChannel ch = null; - while (i++ < 5) { - ch = connectToPeer(peer, ch); - if (ch == null) { - try { - logD(bytes, "Unable to route to peer: " + Request.parse(bytes).toString()); - } catch (Exception e) { - } - return false; - } - try { - if (s_logger.isDebugEnabled()) { - logD(bytes, "Routing to peer"); - } - Link.write(ch, new ByteBuffer[] { ByteBuffer.wrap(bytes) }); - return true; - } catch (IOException e) { - try { - logI(bytes, "Unable to route to peer: " + Request.parse(bytes).toString() + " due to " + e.getMessage()); - } catch (Exception ex) { - } - } - } - return false; + int i = 0; + SocketChannel ch = null; + while (i++ < 5) { + ch = connectToPeer(peer, ch); + if (ch == null) { + try { + logD(bytes, "Unable to route to peer: " + Request.parse(bytes).toString()); + } catch (Exception e) { + } + return false; + } + try { + if (s_logger.isDebugEnabled()) { + logD(bytes, "Routing to peer"); + } + Link.write(ch, new ByteBuffer[] { ByteBuffer.wrap(bytes) }); + return true; + } catch (IOException e) { + try { + logI(bytes, "Unable to route to peer: " + Request.parse(bytes).toString() + " due to " + e.getMessage()); + } catch (Exception ex) { + } + } + } + return false; } - + public String findPeer(long hostId) { return _clusterMgr.getPeerName(hostId); } - + public void cancel(String peerName, long hostId, long sequence, String reason) { CancelCommand cancel = new CancelCommand(sequence, reason); Request req = new Request(-1, hostId, _nodeId, cancel, true); req.setControl(true); routeToPeer(peerName, req.getBytes()); } - + public void closePeer(String peerName) { - synchronized(_peers) { - SocketChannel ch = _peers.get(peerName); - if(ch != null) { - try { - ch.close(); - } catch(IOException e) { - s_logger.warn("Unable to close peer socket connection to " + peerName); - } - } - _peers.remove(peerName); - } + synchronized (_peers) { + SocketChannel ch = _peers.get(peerName); + if (ch != null) { + try { + ch.close(); + } catch (IOException e) { + s_logger.warn("Unable to close peer socket connection to " + peerName); + } + } + _peers.remove(peerName); + } } - + public SocketChannel connectToPeer(String peerName, SocketChannel prevCh) { - synchronized(_peers) { - SocketChannel ch = _peers.get(peerName); - if (prevCh != null) { - try { - prevCh.close(); - } catch (Exception e) { - } - } - if (ch == null || ch == prevCh) { - ManagementServerHostVO ms = _clusterMgr.getPeer(peerName); - if (ms == null) { - s_logger.info("Unable to find peer: " + peerName); - return null; - } - String ip = ms.getServiceIP(); - InetAddress addr; - try { - addr = InetAddress.getByName(ip); - } catch (UnknownHostException e) { - throw new CloudRuntimeException("Unable to resolve " + ip); - } - try { - ch = SocketChannel.open(new InetSocketAddress(addr, _port)); - ch.configureBlocking(true); // make sure we are working at blocking mode - ch.socket().setKeepAlive(true); - ch.socket().setSoTimeout(60 * 1000); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Connection to peer opened: " + peerName + ", ip: " + ip); - } - _peers.put(peerName, ch); - } catch (IOException e) { - s_logger.warn("Unable to connect to peer management server: " + peerName + ", ip: " + ip + " due to " + e.getMessage(), e); - return null; - } - } - - if (s_logger.isTraceEnabled()) { - s_logger.trace("Found open channel for peer: " + peerName); - } - return ch; - } + synchronized (_peers) { + SocketChannel ch = _peers.get(peerName); + if (prevCh != null) { + try { + prevCh.close(); + } catch (Exception e) { + } + } + if (ch == null || ch == prevCh) { + ManagementServerHostVO ms = _clusterMgr.getPeer(peerName); + if (ms == null) { + s_logger.info("Unable to find peer: " + peerName); + return null; + } + String ip = ms.getServiceIP(); + InetAddress addr; + try { + addr = InetAddress.getByName(ip); + } catch (UnknownHostException e) { + throw new CloudRuntimeException("Unable to resolve " + ip); + } + try { + ch = SocketChannel.open(new InetSocketAddress(addr, _port)); + ch.configureBlocking(true); // make sure we are working at blocking mode + ch.socket().setKeepAlive(true); + ch.socket().setSoTimeout(60 * 1000); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Connection to peer opened: " + peerName + ", ip: " + ip); + } + _peers.put(peerName, ch); + } catch (IOException e) { + s_logger.warn("Unable to connect to peer management server: " + peerName + ", ip: " + ip + " due to " + e.getMessage(), e); + return null; + } + } + + if (s_logger.isTraceEnabled()) { + s_logger.trace("Found open channel for peer: " + peerName); + } + return ch; + } } - + public SocketChannel connectToPeer(long hostId, SocketChannel prevCh) { - String peerName = _clusterMgr.getPeerName(hostId); - if (peerName == null) { - return null; - } - - return connectToPeer(peerName, prevCh); + String peerName = _clusterMgr.getPeerName(hostId); + if (peerName == null) { + return null; + } + + return connectToPeer(peerName, prevCh); } - + @Override - protected AgentAttache getAttache(final Long hostId) throws AgentUnavailableException { + protected AgentAttache getAttache(final Long hostId) throws AgentUnavailableException { assert (hostId != null) : "Who didn't check their id value?"; HostVO host = _hostDao.findById(hostId); - if( host == null) { + if (host == null) { throw new AgentUnavailableException("Can't find the host ", hostId); } - - AgentAttache agent = findAttache(hostId); - if (agent == null) { - if (host.getStatus() == Status.Up && (host.getManagementServerId() != null && host.getManagementServerId() != _nodeId)) { - agent = createAttache(hostId); - } - } + + AgentAttache agent = findAttache(hostId); + if (agent == null) { + if (host.getStatus() == Status.Up && (host.getManagementServerId() != null && host.getManagementServerId() != _nodeId)) { + agent = createAttache(hostId); + } + } if (agent == null) { throw new AgentUnavailableException("Host is not in the right state", hostId); } return agent; } - + @Override public boolean stop() { - if(_peers != null) { - for (SocketChannel ch : _peers.values()) { - try { - s_logger.info("Closing: " + ch.toString()); - ch.close(); - } catch (IOException e) { - } - } - } - _timer.cancel(); - return super.stop(); + if (_peers != null) { + for (SocketChannel ch : _peers.values()) { + try { + s_logger.info("Closing: " + ch.toString()); + ch.close(); + } catch (IOException e) { + } + } + } + _timer.cancel(); + return super.stop(); } - + @Override public void startDirectlyConnectedHosts() { - // override and let it be dummy for purpose, we will scan and load direct agents periodically. - // We may also pickup agents that have been left over from other crashed management server + // override and let it be dummy for purpose, we will scan and load direct agents periodically. + // We may also pickup agents that have been left over from other crashed management server } - + public class ClusteredAgentHandler extends AgentHandler { - + public ClusteredAgentHandler(Task.Type type, Link link, byte[] data) { super(type, link, data); } - + @Override protected void doTask(final Task task) throws Exception { - Transaction txn = Transaction.open(Transaction.CLOUD_DB); - try { - if (task.getType() != Task.Type.DATA) { - super.doTask(task); - return; - } - - final byte[] data = task.getData(); - Version ver = Request.getVersion(data); - if (ver.ordinal() < Version.v3.ordinal()) { - super.doTask(task); - return; - } - - long hostId = Request.getAgentId(data); - Link link = task.getLink(); - - if (Request.fromServer(data)) { - - AgentAttache agent = findAgent(hostId); - - if (Request.isControl(data)) { - if (agent == null) { - logD(data, "No attache to process cancellation"); - return; - } - Request req = Request.parse(data); - Command[] cmds = req.getCommands(); - CancelCommand cancel = (CancelCommand)cmds[0]; - if (s_logger.isDebugEnabled()) { - logD(data, "Cancel request received"); - } - agent.cancel(cancel.getSequence()); - return; - } - - try { - if (agent == null || agent.isClosed()) { - throw new AgentUnavailableException("Unable to route to agent ", hostId); - } - - if (Request.isRequest(data) && Request.requiresSequentialExecution(data)) { - // route it to the agent. - // But we have the serialize the control commands here so we have - // to deserialize this and send it through the agent attache. - Request req = Request.parse(data); - agent.send(req, null); - return; - } else { - if (agent instanceof Routable) { - Routable cluster = (Routable)agent; - cluster.routeToAgent(data); - } else { - agent.send(Request.parse(data)); - } - return; - } - } catch (AgentUnavailableException e) { - logD(data, e.getMessage()); - cancel(Long.toString(Request.getManagementServerId(data)), hostId, Request.getSequence(data), e.getMessage()); - } - } else { - long mgmtId = Request.getManagementServerId(data); - if (mgmtId != -1 && mgmtId != _nodeId) { - routeToPeer(Long.toString(mgmtId), data); - if (Request.requiresSequentialExecution(data)) { - AgentAttache attache = (AgentAttache)link.attachment(); - if (attache != null) { - attache.sendNext(Request.getSequence(data)); - } else if (s_logger.isDebugEnabled()){ - logD(data, "No attache to process " + Request.parse(data).toString()); - } - } - return; - } else { - if (Request.isRequest(data)) { - super.doTask(task); - } else { - // received an answer. - final Response response = Response.parse(data); - AgentAttache attache = findAttache(response.getAgentId()); - if (attache == null) { - s_logger.info("SeqA " + response.getAgentId() + "-" + response.getSequence() + "Unable to find attache to forward " + response.toString()); - return; - } - if (!attache.processAnswers(response.getSequence(), response)) { - s_logger.info("SeqA " + attache.getId() + "-" + response.getSequence() + ": Response is not processed: " + response.toString()); - } - } - return; - } - } + Transaction txn = Transaction.open(Transaction.CLOUD_DB); + try { + if (task.getType() != Task.Type.DATA) { + super.doTask(task); + return; + } + + final byte[] data = task.getData(); + Version ver = Request.getVersion(data); + if (ver.ordinal() < Version.v3.ordinal()) { + super.doTask(task); + return; + } + + long hostId = Request.getAgentId(data); + Link link = task.getLink(); + + if (Request.fromServer(data)) { + + AgentAttache agent = findAgent(hostId); + + if (Request.isControl(data)) { + if (agent == null) { + logD(data, "No attache to process cancellation"); + return; + } + Request req = Request.parse(data); + Command[] cmds = req.getCommands(); + CancelCommand cancel = (CancelCommand) cmds[0]; + if (s_logger.isDebugEnabled()) { + logD(data, "Cancel request received"); + } + agent.cancel(cancel.getSequence()); + return; + } + + try { + if (agent == null || agent.isClosed()) { + throw new AgentUnavailableException("Unable to route to agent ", hostId); + } + + if (Request.isRequest(data) && Request.requiresSequentialExecution(data)) { + // route it to the agent. + // But we have the serialize the control commands here so we have + // to deserialize this and send it through the agent attache. + Request req = Request.parse(data); + agent.send(req, null); + return; + } else { + if (agent instanceof Routable) { + Routable cluster = (Routable) agent; + cluster.routeToAgent(data); + } else { + agent.send(Request.parse(data)); + } + return; + } + } catch (AgentUnavailableException e) { + logD(data, e.getMessage()); + cancel(Long.toString(Request.getManagementServerId(data)), hostId, Request.getSequence(data), e.getMessage()); + } + } else { + long mgmtId = Request.getManagementServerId(data); + if (mgmtId != -1 && mgmtId != _nodeId) { + routeToPeer(Long.toString(mgmtId), data); + if (Request.requiresSequentialExecution(data)) { + AgentAttache attache = (AgentAttache) link.attachment(); + if (attache != null) { + attache.sendNext(Request.getSequence(data)); + } else if (s_logger.isDebugEnabled()) { + logD(data, "No attache to process " + Request.parse(data).toString()); + } + } + return; + } else { + if (Request.isRequest(data)) { + super.doTask(task); + } else { + // received an answer. + final Response response = Response.parse(data); + AgentAttache attache = findAttache(response.getAgentId()); + if (attache == null) { + s_logger.info("SeqA " + response.getAgentId() + "-" + response.getSequence() + "Unable to find attache to forward " + response.toString()); + return; + } + if (!attache.processAnswers(response.getSequence(), response)) { + s_logger.info("SeqA " + attache.getId() + "-" + response.getSequence() + ": Response is not processed: " + response.toString()); + } + } + return; + } + } } finally { - txn.close(); + txn.close(); } } } @Override public void onManagementNodeJoined(List nodeList, long selfNodeId) { - + } @Override @@ -590,6 +597,6 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } @Override - public void onManagementNodeIsolated() { - } -} + public void onManagementNodeIsolated() { + } +} diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java index ec8f66772de..b8567b7466c 100644 --- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -76,67 +76,66 @@ import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.dao.VMInstanceDao; /** - * HighAvailabilityManagerImpl coordinates the HA process. VMs are registered with - * the HA Manager for HA. The request is stored within a database backed - * work queue. HAManager has a number of workers that pick up - * these work items to perform HA on the VMs. - * - * The HA process goes as follows: - * 1. Check with the list of Investigators to determine that the VM is - * no longer running. If a Investigator finds the VM is still alive, - * the HA process is stopped and the state of the VM reverts back to - * its previous state. If a Investigator finds the VM is dead, then - * HA process is started on the VM, skipping step 2. - * 2. If the list of Investigators can not determine if the VM is dead or - * alive. The list of FenceBuilders is invoked to fence off the VM - * so that it won't do any damage to the storage and network. - * 3. The VM is marked as stopped. - * 4. The VM is started again via the normal process of starting VMs. Note - * that once the VM is marked as stopped, the user may have started the - * VM himself. - * 5. VMs that have re-started more than the configured number of times are - * marked as in Error state and the user is not allowed to restart - * the VM. - * - * @config - * {@table - * || Param Name | Description | Values | Default || - * || workers | number of worker threads to spin off to do the processing | int | 1 || - * || time.to.sleep | Time to sleep if no work items are found | seconds | 60 || - * || max.retries | number of times to retry start | int | 5 || - * || time.between.failure | Time elapsed between failures before we consider it as another retry | seconds | 3600 || - * || time.between.cleanup | Time to wait before the cleanup thread runs | seconds | 86400 || - * || force.ha | Force HA to happen even if the VM says no | boolean | false || - * || ha.retry.wait | time to wait before retrying the work item | seconds | 120 || - * || stop.retry.wait | time to wait before retrying the stop | seconds | 120 || - * } + * HighAvailabilityManagerImpl coordinates the HA process. VMs are registered with the HA Manager for HA. The request is stored + * within a database backed work queue. HAManager has a number of workers that pick up these work items to perform HA on the + * VMs. + * + * The HA process goes as follows: 1. Check with the list of Investigators to determine that the VM is no longer running. If a + * Investigator finds the VM is still alive, the HA process is stopped and the state of the VM reverts back to its previous + * state. If a Investigator finds the VM is dead, then HA process is started on the VM, skipping step 2. 2. If the list of + * Investigators can not determine if the VM is dead or alive. The list of FenceBuilders is invoked to fence off the VM so that + * it won't do any damage to the storage and network. 3. The VM is marked as stopped. 4. The VM is started again via the normal + * process of starting VMs. Note that once the VM is marked as stopped, the user may have started the VM himself. 5. VMs that + * have re-started more than the configured number of times are marked as in Error state and the user is not allowed to restart + * the VM. + * + * @config {@table || Param Name | Description | Values | Default || || workers | number of worker threads to spin off to do the + * processing | int | 1 || || time.to.sleep | Time to sleep if no work items are found | seconds | 60 || || max.retries + * | number of times to retry start | int | 5 || || time.between.failure | Time elapsed between failures before we + * consider it as another retry | seconds | 3600 || || time.between.cleanup | Time to wait before the cleanup thread + * runs | seconds | 86400 || || force.ha | Force HA to happen even if the VM says no | boolean | false || || + * ha.retry.wait | time to wait before retrying the work item | seconds | 120 || || stop.retry.wait | time to wait + * before retrying the stop | seconds | 120 || * } **/ -@Local(value={HighAvailabilityManager.class}) +@Local(value = { HighAvailabilityManager.class }) public class HighAvailabilityManagerImpl implements HighAvailabilityManager, ClusterManagerListener { protected static final Logger s_logger = Logger.getLogger(HighAvailabilityManagerImpl.class); String _name; WorkerThread[] _workers; boolean _stopped; long _timeToSleep; - @Inject HighAvailabilityDao _haDao; - @Inject VMInstanceDao _instanceDao; - @Inject HostDao _hostDao; - @Inject DataCenterDao _dcDao; - @Inject HostPodDao _podDao; - @Inject ClusterDetailsDao _clusterDetailsDao; + @Inject + HighAvailabilityDao _haDao; + @Inject + VMInstanceDao _instanceDao; + @Inject + HostDao _hostDao; + @Inject + DataCenterDao _dcDao; + @Inject + HostPodDao _podDao; + @Inject + ClusterDetailsDao _clusterDetailsDao; long _serverId; - @Inject(adapter=Investigator.class) + @Inject(adapter = Investigator.class) Adapters _investigators; - @Inject(adapter=FenceBuilder.class) + @Inject(adapter = FenceBuilder.class) Adapters _fenceBuilders; - @Inject AgentManager _agentMgr; - @Inject AlertManager _alertMgr; - @Inject StorageManager _storageMgr; - @Inject GuestOSDao _guestOSDao; - @Inject GuestOSCategoryDao _guestOSCategoryDao; - @Inject VirtualMachineManager _itMgr; - @Inject AccountManager _accountMgr; - + @Inject + AgentManager _agentMgr; + @Inject + AlertManager _alertMgr; + @Inject + StorageManager _storageMgr; + @Inject + GuestOSDao _guestOSDao; + @Inject + GuestOSCategoryDao _guestOSCategoryDao; + @Inject + VirtualMachineManager _itMgr; + @Inject + AccountManager _accountMgr; + String _instance; ScheduledExecutorService _executor; int _stopRetryInterval; @@ -167,7 +166,7 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu hostState = investigator.isAgentAlive(host); if (hostState != null) { if (s_logger.isDebugEnabled()) { - s_logger.debug(investigator.getName()+ " was able to determine host " + hostId + " is in " + hostState.toString()); + s_logger.debug(investigator.getName() + " was able to determine host " + hostId + " is in " + hostState.toString()); } return hostState; } @@ -179,11 +178,10 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu return null; } - @Override public void scheduleRestartForVmsOnHost(final HostVO host) { - - if( host.getType() != Host.Type.Routing) { + + if (host.getType() != Host.Type.Routing) { return; } s_logger.warn("Scheduling restart for VMs on host " + host.getId()); @@ -213,7 +211,8 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu HostPodVO podVO = _podDao.findById(host.getPodId()); String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host is down, " + hostDesc, "Host [" + hostDesc + "] is down." + ((sb != null) ? sb.toString() : "")); + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host is down, " + hostDesc, "Host [" + hostDesc + "] is down." + + ((sb != null) ? sb.toString() : "")); for (final VMInstanceVO vm : vms) { if (s_logger.isDebugEnabled()) { @@ -225,25 +224,25 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu @Override public void scheduleStop(VMInstanceVO vm, long hostId, WorkType type) { - assert (type == WorkType.CheckStop || type == WorkType.ForceStop || type == WorkType.Stop); - - if (_haDao.hasBeenScheduled(vm.getId(), type)) { - s_logger.info("There's already a job scheduled to stop " + vm); - return; - } - - HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), type, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); + assert (type == WorkType.CheckStop || type == WorkType.ForceStop || type == WorkType.Stop); + + if (_haDao.hasBeenScheduled(vm.getId(), type)) { + s_logger.info("There's already a job scheduled to stop " + vm); + return; + } + + HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), type, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); if (s_logger.isDebugEnabled()) { - s_logger.debug("Scheduled " + work); + s_logger.debug("Scheduled " + work); } wakeupWorkers(); } protected void wakeupWorkers() { - for (WorkerThread worker : _workers) { - worker.wakup(); - } + for (WorkerThread worker : _workers) { + worker.wakup(); + } } @Override @@ -256,15 +255,15 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu @Override public void scheduleRestart(VMInstanceVO vm, final boolean investigate) { - Long hostId = vm.getHostId(); - if (hostId == null) { - _itMgr.stateTransitTo(vm, Event.OperationFailed, null); - return; - } + Long hostId = vm.getHostId(); + if (hostId == null) { + _itMgr.stateTransitTo(vm, Event.OperationFailed, null); + return; + } if (!investigate) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("VM does not require investigation so I'm marking it as Stopped: " + vm.toString()); - } + if (s_logger.isDebugEnabled()) { + s_logger.debug("VM does not require investigation so I'm marking it as Stopped: " + vm.toString()); + } short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { @@ -275,12 +274,9 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu if (!(_forceHA || vm.isHaEnabled())) { String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterId() + ", pod id:" + vm.getPodId(); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " - + vm.getName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " - + hostDesc, "Virtual Machine " + vm.getName() + " (id: " - + vm.getId() + ") running on host [" + vm.getHostId() - + "] stopped unexpectedly."); - + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " + vm.getName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, + "Virtual Machine " + vm.getName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly."); + if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } @@ -313,11 +309,10 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu } } if (NeedToAddNew) { - final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(), - maxRetries + 1, vm.getUpdated()); + final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(), maxRetries + 1, vm.getUpdated()); _haDao.persist(work); } - + if (s_logger.isInfoEnabled()) { s_logger.info("Schedule vm for HA: " + vm.toString()); } @@ -337,16 +332,11 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu s_logger.info("HA on " + vm); if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) { - s_logger.info("VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime()); - return null; + s_logger.info("VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + + " previous updated = " + work.getUpdateTime()); + return null; } - HostVO host = _hostDao.findById(work.getHostId()); - - DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); - HostPodVO podVO = _podDao.findById(host.getPodId()); - String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); - short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; @@ -354,82 +344,101 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY; } + HostVO host = _hostDao.findById(work.getHostId()); + boolean isHostRemoved = false; + if (host == null) { + host = _hostDao.findByIdIncludingRemoved(work.getHostId()); + if (host != null) { + s_logger.debug("VM " + vm.toString() + " is now no longer on host " + work.getHostId() + " as the host is removed"); + isHostRemoved = true; + } + } + + DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); + HostPodVO podVO = _podDao.findById(host.getPodId()); + String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); + Boolean alive = null; if (work.getStep() == Step.Investigating) { - if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) { - s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId()); - return null; - } - - Enumeration en = _investigators.enumeration(); - Investigator investigator = null; - while (en.hasMoreElements()) { - investigator = en.nextElement(); - alive = investigator.isVmAlive(vm, host); - s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive); - if (alive != null) { - break; + if (!isHostRemoved) { + if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) { + s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId()); + return null; } - } - - boolean fenced = false; - if (alive == null) { - s_logger.debug("Fencing off VM that we don't know the state of"); - Enumeration enfb = _fenceBuilders.enumeration(); - while (enfb.hasMoreElements()) { - FenceBuilder fb = enfb.nextElement(); - Boolean result = fb.fenceOff(vm, host); - s_logger.info("Fencer " + fb.getName() + " returned " + result); - if (result != null && result) { - fenced = true; - break; + + Enumeration en = _investigators.enumeration(); + Investigator investigator = null; + while (en.hasMoreElements()) { + investigator = en.nextElement(); + alive = investigator.isVmAlive(vm, host); + s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive); + if (alive != null) { + break; } } - } else if (!alive) { - fenced = true; - } else { - s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName()); - if (host.getStatus() == Status.Up) { - s_logger.info(vm + " is alive and host is up. No need to restart it."); - return null; - } else { - s_logger.debug("Rescheduling because the host is not up but the vm is alive"); - return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; - } - } - - if (!fenced) { - s_logger.debug("We were unable to fence off the VM " + vm); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); - return (System.currentTimeMillis() >> 10) + _restartRetryInterval; - } - try { - _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); - } catch (ResourceUnavailableException e) { - assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); - } catch (OperationTimedoutException e) { - assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); - } catch (ConcurrentOperationException e) { - assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + boolean fenced = false; + if (alive == null) { + s_logger.debug("Fencing off VM that we don't know the state of"); + Enumeration enfb = _fenceBuilders.enumeration(); + while (enfb.hasMoreElements()) { + FenceBuilder fb = enfb.nextElement(); + Boolean result = fb.fenceOff(vm, host); + s_logger.info("Fencer " + fb.getName() + " returned " + result); + if (result != null && result) { + fenced = true; + break; + } + } + } else if (!alive) { + fenced = true; + } else { + s_logger.debug("VM " + vm.getName() + " is found to be alive by " + investigator.getName()); + if (host.getStatus() == Status.Up) { + s_logger.info(vm + " is alive and host is up. No need to restart it."); + return null; + } else { + s_logger.debug("Rescheduling because the host is not up but the vm is alive"); + return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; + } + } + + if (!fenced) { + s_logger.debug("We were unable to fence off the VM " + vm); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, + "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); + return (System.currentTimeMillis() >> 10) + _restartRetryInterval; + } + + try { + _itMgr.advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); + } catch (ResourceUnavailableException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } catch (OperationTimedoutException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } catch (ConcurrentOperationException e) { + assert false : "How do we hit this when force is true?"; + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + } + + work.setStep(Step.Scheduled); + _haDao.update(work.getId(), work); + } else { + assert false : "How come that HA step is Investigating and the host is removed?"; } - - work.setStep(Step.Scheduled); - _haDao.update(work.getId(), work); } - + vm = _itMgr.findById(vm.getType(), vm.getId()); if (!_forceHA && !vm.isHaEnabled()) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } - return null; // VM doesn't require HA + return null; // VM doesn't require HA } - + if (!_storageMgr.canVmRestartOnAnotherServer(vm.getId())) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM can not restart on another server."); @@ -450,20 +459,24 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu } if (s_logger.isDebugEnabled()) { - s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); + s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); } } catch (final InsufficientCapacityException e) { - s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); + s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, + "Insufficient capacity to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (final ResourceUnavailableException e) { - s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); + s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, + "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (ConcurrentOperationException e) { - s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); + s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, + "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (OperationTimedoutException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "Unable to restart " + vm.getName() + " which was running on host " + hostDesc, + "The Storage is unavailable for trying to restart VM, name: " + vm.getName() + ", id: " + vmId + " which was running on host " + hostDesc); } vm = _itMgr.findById(vm.getType(), vm.getId()); work.setUpdateTime(vm.getUpdated()); @@ -471,15 +484,14 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } - public Long migrate(final HaWorkVO work) { long vmId = work.getInstanceId(); - + long srcHostId = work.getHostId(); try { work.setStep(Step.Migrating); _haDao.update(work.getId(), work); - + if (!_itMgr.migrateAway(work.getType(), vmId, srcHostId)) { s_logger.warn("Unable to migrate vm from " + srcHostId); _agentMgr.maintenanceFailed(srcHostId); @@ -488,58 +500,58 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu } catch (InsufficientServerCapacityException e) { s_logger.warn("Insufficient capacity for migrating a VM."); _agentMgr.maintenanceFailed(srcHostId); - return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; + return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; } catch (VirtualMachineMigrationException e) { s_logger.warn("Looks like VM is still starting, we need to retry migrating the VM later."); _agentMgr.maintenanceFailed(srcHostId); - return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; + return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; } } - + @Override public void scheduleDestroy(VMInstanceVO vm, long hostId) { final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Destroy, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); if (s_logger.isDebugEnabled()) { - s_logger.debug("Scheduled " + work.toString()); + s_logger.debug("Scheduled " + work.toString()); } wakeupWorkers(); } - + @Override public void cancelDestroy(VMInstanceVO vm, Long hostId) { - _haDao.delete(vm.getId(), WorkType.Destroy); + _haDao.delete(vm.getId(), WorkType.Destroy); } - + protected Long destroyVM(HaWorkVO work) { final VMInstanceVO vm = _itMgr.findById(work.getType(), work.getInstanceId()); s_logger.info("Destroying " + vm.toString()); try { - if (vm.getState() != State.Destroyed) { - s_logger.info("VM is no longer in Destroyed state " + vm.toString()); - return null; - } - + if (vm.getState() != State.Destroyed) { + s_logger.info("VM is no longer in Destroyed state " + vm.toString()); + return null; + } + if (vm.getHostId() != null) { - if (_itMgr.destroy(vm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { + if (_itMgr.destroy(vm, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { s_logger.info("Successfully destroy " + vm); - return null; - } - s_logger.debug("Stop for " + vm + " was unsuccessful."); + return null; + } + s_logger.debug("Stop for " + vm + " was unsuccessful."); } else { - if (s_logger.isDebugEnabled()) { - s_logger.debug(vm + " has already been stopped"); - } + if (s_logger.isDebugEnabled()) { + s_logger.debug(vm + " has already been stopped"); + } return null; } } catch (final AgentUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { - s_logger.debug("operation timed out: " + e.getMessage()); - } catch (ConcurrentOperationException e) { + s_logger.debug("operation timed out: " + e.getMessage()); + } catch (ConcurrentOperationException e) { s_logger.debug("concurrent operation: " + e.getMessage()); } - + work.setTimesTried(work.getTimesTried() + 1); return (System.currentTimeMillis() >> 10) + _stopRetryInterval; } @@ -553,27 +565,27 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu } s_logger.info("Stopping " + vm); try { - if (work.getWorkType() == WorkType.Stop) { - if (vm.getHostId() == null) { + if (work.getWorkType() == WorkType.Stop) { + if (vm.getHostId() == null) { if (s_logger.isDebugEnabled()) { s_logger.debug(vm.toString() + " has already been stopped"); } return null; - } + } if (_itMgr.advanceStop(vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { - s_logger.info("Successfully stopped " + vm); + s_logger.info("Successfully stopped " + vm); return null; } - } else if (work.getWorkType() == WorkType.CheckStop) { - if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { - s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); - return null; - } - if (_itMgr.advanceStop(vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { - s_logger.info("Stop for " + vm + " was successful"); - return null; - } - } else if (work.getWorkType() == WorkType.ForceStop){ + } else if (work.getWorkType() == WorkType.CheckStop) { + if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { + s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); + return null; + } + if (_itMgr.advanceStop(vm, false, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount())) { + s_logger.info("Stop for " + vm + " was successful"); + return null; + } + } else if (work.getWorkType() == WorkType.ForceStop) { if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); return null; @@ -582,15 +594,15 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu s_logger.info("Stop for " + vm + " was successful"); return null; } - } else { - assert false : "Who decided there's other steps but didn't modify the guy who does the work?"; - } + } else { + assert false : "Who decided there's other steps but didn't modify the guy who does the work?"; + } } catch (final ResourceUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { - s_logger.debug("operation timed out: " + e.getMessage()); - } - + s_logger.debug("operation timed out: " + e.getMessage()); + } + work.setTimesTried(work.getTimesTried() + 1); if (s_logger.isDebugEnabled()) { s_logger.debug("Stop was unsuccessful. Rescheduling"); @@ -604,15 +616,15 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu _haDao.deleteMigrationWorkItems(host.getId(), type, _serverId); } - + @Override public List findTakenMigrationWork() { - List works = _haDao.findTakenWorkItems(WorkType.Migration); - List vms = new ArrayList(works.size()); - for (HaWorkVO work : works) { - vms.add(_instanceDao.findById(work.getInstanceId())); - } - return vms; + List works = _haDao.findTakenWorkItems(WorkType.Migration); + List vms = new ArrayList(works.size()); + for (HaWorkVO work : works) { + vms.add(_instanceDao.findById(work.getInstanceId())); + } + return vms; } @Override @@ -620,7 +632,7 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu _name = name; ComponentLocator locator = ComponentLocator.getLocator(ManagementServer.Name); - _serverId = ((ManagementServer)ComponentLocator.getComponent(ManagementServer.Name)).getId(); + _serverId = ((ManagementServer) ComponentLocator.getComponent(ManagementServer.Name)).getId(); _investigators = locator.getAdapters(Investigator.class); _fenceBuilders = locator.getAdapters(FenceBuilder.class); @@ -652,16 +664,16 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu value = params.get("time.between.cleanup"); _timeBetweenCleanups = NumbersUtil.parseLong(value, 3600 * 24); - + value = params.get("stop.retry.interval"); _stopRetryInterval = NumbersUtil.parseInt(value, 10 * 60); - + value = params.get("restart.retry.interval"); _restartRetryInterval = NumbersUtil.parseInt(value, 10 * 60); - + value = params.get("investigate.retry.interval"); _investigateRetryInterval = NumbersUtil.parseInt(value, 1 * 60); - + value = params.get("migrate.retry.interval"); _migrateRetryInterval = NumbersUtil.parseInt(value, 2 * 60); @@ -669,16 +681,16 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu if (_instance == null) { _instance = "VMOPS"; } - + _haDao.releaseWorkItems(_serverId); _stopped = true; - + _executor = Executors.newScheduledThreadPool(count, new NamedThreadFactory("HA")); return true; } - + @Override public String getName() { return _name; @@ -696,7 +708,7 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu return true; } - + @Override public boolean stop() { _stopped = true; @@ -714,23 +726,23 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu s_logger.info("HA Cleanup Thread Running"); try { - _haDao.cleanup(System.currentTimeMillis() - _timeBetweenFailures); + _haDao.cleanup(System.currentTimeMillis() - _timeBetweenFailures); } catch (Exception e) { s_logger.warn("Error while cleaning up", e); } finally { - StackMaid.current().exitCleanup(); + StackMaid.current().exitCleanup(); } } } - + protected class WorkerThread extends Thread { - public WorkerThread(String name) { - super(name); - } - + public WorkerThread(String name) { + super(name); + } + @Override public void run() { - s_logger.info("Starting work"); + s_logger.info("Starting work"); while (!_stopped) { HaWorkVO work = null; try { @@ -738,9 +750,9 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu work = _haDao.take(_serverId); if (work == null) { try { - synchronized(this) { - wait(_timeToSleep); - } + synchronized (this) { + wait(_timeToSleep); + } continue; } catch (final InterruptedException e) { s_logger.info("Interrupted"); @@ -761,17 +773,17 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu } else if (wt == WorkType.Stop || wt == WorkType.CheckStop || wt == WorkType.ForceStop) { nextTime = stopVM(work); } else if (wt == WorkType.Destroy) { - nextTime = destroyVM(work); + nextTime = destroyVM(work); } else { - assert false : "How did we get here with " + wt.toString(); + assert false : "How did we get here with " + wt.toString(); continue; } - + if (nextTime == null) { - s_logger.info("Completed " + work); + s_logger.info("Completed " + work); work.setStep(Step.Done); } else { - s_logger.info("Rescheduling " + work + " to try again at " + new Date(nextTime << 10)); + s_logger.info("Rescheduling " + work + " to try again at " + new Date(nextTime << 10)); work.setTimeToTry(nextTime); work.setServerId(null); work.setDateTaken(null); @@ -781,20 +793,20 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu work.setStep(Step.Error); } _haDao.update(work.getId(), work); - } catch(final Throwable th) { + } catch (final Throwable th) { s_logger.error("Caught this throwable, ", th); } finally { - StackMaid.current().exitCleanup(); - if (work != null) { - NDC.pop(); - } + StackMaid.current().exitCleanup(); + if (work != null) { + NDC.pop(); + } } } s_logger.info("Time to go home!"); } - + public synchronized void wakup() { - notifyAll(); + notifyAll(); } } @@ -808,8 +820,8 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager, Clu _haDao.releaseWorkItems(node.getMsid()); } } - + @Override - public void onManagementNodeIsolated() { - } + public void onManagementNodeIsolated() { + } } diff --git a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java index 9803b359726..3a0b07aacb4 100755 --- a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -142,52 +142,83 @@ import com.cloud.vm.dao.SecondaryStorageVmDao; import com.cloud.vm.dao.UserVmDao; import com.cloud.vm.dao.VMInstanceDao; -@Local(value=VirtualMachineManager.class) +@Local(value = VirtualMachineManager.class) public class VirtualMachineManagerImpl implements VirtualMachineManager, Listener { private static final Logger s_logger = Logger.getLogger(VirtualMachineManagerImpl.class); - + String _name; - @Inject protected StorageManager _storageMgr; - @Inject protected NetworkManager _networkMgr; - @Inject protected AgentManager _agentMgr; - @Inject protected VMInstanceDao _vmDao; - @Inject protected ServiceOfferingDao _offeringDao; - @Inject protected VMTemplateDao _templateDao; - @Inject protected UserDao _userDao; - @Inject protected AccountDao _accountDao; - @Inject protected DomainDao _domainDao; - @Inject protected ClusterManager _clusterMgr; - @Inject protected ItWorkDao _workDao; - @Inject protected UserVmDao _userVmDao; - @Inject protected DomainRouterDao _routerDao; - @Inject protected ConsoleProxyDao _consoleDao; - @Inject protected SecondaryStorageVmDao _secondaryDao; - @Inject protected UsageEventDao _usageEventDao; - @Inject protected NicDao _nicsDao; - @Inject protected AccountManager _accountMgr; - @Inject protected HostDao _hostDao; - @Inject protected AlertManager _alertMgr; - @Inject protected GuestOSCategoryDao _guestOsCategoryDao; - @Inject protected GuestOSDao _guestOsDao; - @Inject protected VolumeDao _volsDao; - @Inject protected ConsoleProxyManager _consoleProxyMgr; - @Inject protected ConfigurationManager _configMgr; - @Inject protected CapacityManager _capacityMgr; - @Inject protected HighAvailabilityManager _haMgr; - @Inject protected HostPodDao _podDao; - @Inject protected DataCenterDao _dcDao; - @Inject protected StoragePoolDao _storagePoolDao; - @Inject protected HypervisorGuruManager _hvGuruMgr; - - @Inject(adapter=DeploymentPlanner.class) + @Inject + protected StorageManager _storageMgr; + @Inject + protected NetworkManager _networkMgr; + @Inject + protected AgentManager _agentMgr; + @Inject + protected VMInstanceDao _vmDao; + @Inject + protected ServiceOfferingDao _offeringDao; + @Inject + protected VMTemplateDao _templateDao; + @Inject + protected UserDao _userDao; + @Inject + protected AccountDao _accountDao; + @Inject + protected DomainDao _domainDao; + @Inject + protected ClusterManager _clusterMgr; + @Inject + protected ItWorkDao _workDao; + @Inject + protected UserVmDao _userVmDao; + @Inject + protected DomainRouterDao _routerDao; + @Inject + protected ConsoleProxyDao _consoleDao; + @Inject + protected SecondaryStorageVmDao _secondaryDao; + @Inject + protected UsageEventDao _usageEventDao; + @Inject + protected NicDao _nicsDao; + @Inject + protected AccountManager _accountMgr; + @Inject + protected HostDao _hostDao; + @Inject + protected AlertManager _alertMgr; + @Inject + protected GuestOSCategoryDao _guestOsCategoryDao; + @Inject + protected GuestOSDao _guestOsDao; + @Inject + protected VolumeDao _volsDao; + @Inject + protected ConsoleProxyManager _consoleProxyMgr; + @Inject + protected ConfigurationManager _configMgr; + @Inject + protected CapacityManager _capacityMgr; + @Inject + protected HighAvailabilityManager _haMgr; + @Inject + protected HostPodDao _podDao; + @Inject + protected DataCenterDao _dcDao; + @Inject + protected StoragePoolDao _storagePoolDao; + @Inject + protected HypervisorGuruManager _hvGuruMgr; + + @Inject(adapter = DeploymentPlanner.class) protected Adapters _planners; - + Map> _vmGurus = new HashMap>(); protected StateMachine2 _stateMachine; - + ScheduledExecutorService _executor = null; protected int _operationTimeout; - + protected int _retry; protected long _nodeId; protected long _cleanupWait; @@ -198,28 +229,22 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene @Override public void registerGuru(VirtualMachine.Type type, VirtualMachineGuru guru) { - synchronized(_vmGurus) { + synchronized (_vmGurus) { _vmGurus.put(type, guru); } } - - @Override @DB - public T allocate(T vm, - VMTemplateVO template, - ServiceOfferingVO serviceOffering, - Pair rootDiskOffering, - List> dataDiskOfferings, - List> networks, - Map params, - DeploymentPlan plan, - HypervisorType hyperType, - Account owner) throws InsufficientCapacityException { + + @Override + @DB + public T allocate(T vm, VMTemplateVO template, ServiceOfferingVO serviceOffering, Pair rootDiskOffering, + List> dataDiskOfferings, List> networks, Map params, DeploymentPlan plan, + HypervisorType hyperType, Account owner) throws InsufficientCapacityException { if (s_logger.isDebugEnabled()) { s_logger.debug("Allocating entries for VM: " + vm); } - + VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, serviceOffering, owner, params); - + vm.setDataCenterId(plan.getDataCenterId()); if (plan.getPodId() != null) { vm.setPodId(plan.getPodId()); @@ -228,10 +253,10 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene vm.setHostId(plan.getHostId()); } assert (plan.getPoolId() == null) : "We currently don't support pool preset yet"; - + @SuppressWarnings("unchecked") - VirtualMachineGuru guru = (VirtualMachineGuru)_vmGurus.get(vm.getType()); - + VirtualMachineGuru guru = (VirtualMachineGuru) _vmGurus.get(vm.getType()); + Transaction txn = Transaction.currentTxn(); txn.start(); vm = guru.persist(vm); @@ -248,19 +273,19 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene if (dataDiskOfferings == null) { dataDiskOfferings = new ArrayList>(0); } - + if (s_logger.isDebugEnabled()) { s_logger.debug("Allocaing disks for " + vm); } - + if (template.getFormat() == ImageFormat.ISO) { _storageMgr.allocateRawVolume(Type.ROOT, "ROOT-" + vm.getId(), rootDiskOffering.first(), rootDiskOffering.second(), vm, owner); } else if (template.getFormat() == ImageFormat.BAREMETAL) { - // Do nothing - }else { + // Do nothing + } else { _storageMgr.allocateTemplatedVolume(Type.ROOT, "ROOT-" + vm.getId(), rootDiskOffering.first(), template, vm, owner); } - + for (Pair offering : dataDiskOfferings) { _storageMgr.allocateRawVolume(Type.DATADISK, "DATA-" + vm.getId(), offering.first(), offering.second(), vm, owner); } @@ -269,53 +294,41 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene if (s_logger.isDebugEnabled()) { s_logger.debug("Allocation completed for VM: " + vm); } - + return vm; } - + @Override - public T allocate(T vm, - VMTemplateVO template, - ServiceOfferingVO serviceOffering, - Long rootSize, - Pair dataDiskOffering, - List> networks, - DeploymentPlan plan, - HypervisorType hyperType, - Account owner) throws InsufficientCapacityException { + public T allocate(T vm, VMTemplateVO template, ServiceOfferingVO serviceOffering, Long rootSize, Pair dataDiskOffering, + List> networks, DeploymentPlan plan, HypervisorType hyperType, Account owner) throws InsufficientCapacityException { List> diskOfferings = new ArrayList>(1); if (dataDiskOffering != null) { diskOfferings.add(dataDiskOffering); } return allocate(vm, template, serviceOffering, new Pair(serviceOffering, rootSize), diskOfferings, networks, null, plan, hyperType, owner); } - + @Override - public T allocate(T vm, - VMTemplateVO template, - ServiceOfferingVO serviceOffering, - List> networks, - DeploymentPlan plan, - HypervisorType hyperType, - Account owner) throws InsufficientCapacityException { + public T allocate(T vm, VMTemplateVO template, ServiceOfferingVO serviceOffering, List> networks, DeploymentPlan plan, + HypervisorType hyperType, Account owner) throws InsufficientCapacityException { return allocate(vm, template, serviceOffering, new Pair(serviceOffering, null), null, networks, null, plan, hyperType, owner); } - + @SuppressWarnings("unchecked") private VirtualMachineGuru getVmGuru(T vm) { - return (VirtualMachineGuru)_vmGurus.get(vm.getType()); + return (VirtualMachineGuru) _vmGurus.get(vm.getType()); } - + @SuppressWarnings("unchecked") private VirtualMachineGuru getBareMetalVmGuru(T vm) { - return (VirtualMachineGuru)_vmGurus.get(VirtualMachine.Type.UserBareMetal); + return (VirtualMachineGuru) _vmGurus.get(VirtualMachine.Type.UserBareMetal); } - + @Override public boolean expunge(T vm, User caller, Account account) throws ResourceUnavailableException { try { if (advanceExpunge(vm, caller, account)) { - //Mark vms as removed + // Mark vms as removed remove(vm, caller, account); return true; } else { @@ -328,7 +341,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene throw new CloudRuntimeException("Concurrent operation ", e); } } - + @Override public boolean advanceExpunge(T vm, User caller, Account account) throws ResourceUnavailableException, OperationTimedoutException, ConcurrentOperationException { if (vm == null || vm.getRemoved() != null) { @@ -337,7 +350,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } return true; } - + if (!this.advanceStop(vm, false, caller, account)) { if (s_logger.isDebugEnabled()) { s_logger.debug("Unable to stop the VM so we can't expunge it."); @@ -348,20 +361,20 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.debug("Unable to destroy the vm because it is not in the correct state: " + vm.toString()); return false; } - + if (s_logger.isDebugEnabled()) { s_logger.debug("Destroying vm " + vm); } - + VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); _networkMgr.cleanupNics(profile); - //Clean up volumes based on the vm's instance id - _storageMgr.cleanupVolumes(vm.getId()); - + // Clean up volumes based on the vm's instance id + _storageMgr.cleanupVolumes(vm.getId()); + VirtualMachineGuru guru = getVmGuru(vm); guru.finalizeExpunge(vm); - + if (s_logger.isDebugEnabled()) { s_logger.debug("Expunged " + vm); } @@ -380,17 +393,17 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene public boolean stop() { return true; } - + @Override public boolean configure(String name, Map xmlParams) throws ConfigurationException { _name = name; - + ComponentLocator locator = ComponentLocator.getCurrentLocator(); ConfigurationDao configDao = locator.getDao(ConfigurationDao.class); Map params = configDao.getConfiguration(xmlParams); - + _retry = NumbersUtil.parseInt(params.get(Config.StartRetry.key()), 10); - + ReservationContextImpl.setComponents(_userDao, _domainDao, _accountDao); VirtualMachineProfileImpl.setComponents(_offeringDao, _templateDao, _accountDao); @@ -400,24 +413,24 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene _opWaitInterval = NumbersUtil.parseLong(params.get(Config.VmOpWaitInterval.key()), 120) * 1000; _lockStateRetry = NumbersUtil.parseInt(params.get(Config.VmOpLockStateRetry.key()), 5); _operationTimeout = NumbersUtil.parseInt(params.get(Config.Wait.key()), 1800) * 2; - + _executor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("Vm-Operations-Cleanup")); _nodeId = _clusterMgr.getManagementNodeId(); - + _agentMgr.registerForHostEvents(this, true, true, true); - + return true; } - + @Override public String getName() { return _name; } - + protected VirtualMachineManagerImpl() { setStateMachine(); } - + @Override public T start(T vm, Map params, User caller, Account account) throws InsufficientCapacityException, ResourceUnavailableException { try { @@ -436,19 +449,19 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } return true; } - + if (vo.getStep() == Step.Done) { if (s_logger.isDebugEnabled()) { s_logger.debug("Work for " + vm + " is " + vo.getStep()); } return true; } - + if (vo.getSecondsTaskIsInactive() > _cancelWait) { s_logger.warn("The task item for vm " + vm + " has been inactive for " + vo.getSecondsTaskIsInactive()); return false; } - + try { Thread.sleep(_opWaitInterval); } catch (InterruptedException e) { @@ -457,14 +470,14 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } s_logger.debug("Waiting some more to make sure there's no activity on " + vm); } - - + } - + @DB - protected Ternary changeToStartState(VirtualMachineGuru vmGuru, T vm, User caller, Account account) throws ConcurrentOperationException { + protected Ternary changeToStartState(VirtualMachineGuru vmGuru, T vm, User caller, Account account) + throws ConcurrentOperationException { long vmId = vm.getId(); - + ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Starting, vm.getType(), vm.getId()); int retry = _lockStateRetry; while (retry-- != 0) { @@ -472,26 +485,26 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene txn.start(); try { if (stateTransitTo(vm, Event.StartRequested, null, work.getId())) { - + Journal journal = new Journal.LogJournal("Creating " + vm, s_logger); work = _workDao.persist(work); ReservationContextImpl context = new ReservationContextImpl(work.getId(), journal, caller, account); - + if (s_logger.isDebugEnabled()) { s_logger.debug("Successfully transitioned to start state for " + vm + " reservation id = " + work.getId()); } return new Ternary(vmGuru.findById(vmId), context, work); } - + if (s_logger.isDebugEnabled()) { s_logger.debug("Determining why we're unable to update the state to Starting for " + vm); - } - + } + VMInstanceVO instance = _vmDao.findById(vmId); if (instance == null) { throw new ConcurrentOperationException("Unable to acquire lock on " + vm); } - + State state = instance.getState(); if (state == State.Running) { if (s_logger.isDebugEnabled()) { @@ -499,7 +512,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } return null; } - + if (state.isTransitional()) { if (!checkWorkItems(vm, state)) { throw new ConcurrentOperationException("There are concurrent operations on the VM " + vm); @@ -507,7 +520,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene continue; } } - + if (state != State.Stopped) { s_logger.debug("VM " + vm + " is not in a state to be started: " + state); return null; @@ -516,10 +529,10 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene txn.commit(); } } - + throw new ConcurrentOperationException("Unable to change the state of " + vm); } - + @DB protected boolean changeState(T vm, Event event, Long hostId, ItWorkVO work, Step step) { Transaction txn = Transaction.currentTxn(); @@ -531,66 +544,68 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene txn.commit(); return true; } - + @Override - public T advanceStart(T vm, Map params, User caller, Account account) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException { + public T advanceStart(T vm, Map params, User caller, Account account) throws InsufficientCapacityException, + ConcurrentOperationException, ResourceUnavailableException { long vmId = vm.getId(); - Long hostIdSpecified = vm.getHostId(); + Long hostIdSpecified = vm.getHostId(); VirtualMachineGuru vmGuru; if (vm.getHypervisorType() == HypervisorType.BareMetal) { - vmGuru = getBareMetalVmGuru(vm); + vmGuru = getBareMetalVmGuru(vm); } else { - vmGuru = getVmGuru(vm); + vmGuru = getVmGuru(vm); } - - vm = vmGuru.findById(vm.getId()); + + vm = vmGuru.findById(vm.getId()); Ternary start = changeToStartState(vmGuru, vm, caller, account); if (start == null) { return vmGuru.findById(vmId); } - + vm = start.first(); ReservationContext ctx = start.second(); ItWorkVO work = start.third(); - + T startedVm = null; ServiceOfferingVO offering = _offeringDao.findById(vm.getServiceOfferingId()); VMTemplateVO template = _templateDao.findById(vm.getTemplateId()); - + Long clusterSpecified = null; - if(hostIdSpecified != null){ - Host destinationHost = _hostDao.findById(hostIdSpecified); - clusterSpecified = destinationHost.getClusterId(); + if (hostIdSpecified != null) { + Host destinationHost = _hostDao.findById(hostIdSpecified); + clusterSpecified = destinationHost.getClusterId(); } DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), vm.getPodId(), clusterSpecified, hostIdSpecified, null); HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); - + try { Journal journal = start.second().getJournal(); - //edit plan if this vm's ROOT volume is in READY state already + // edit plan if this vm's ROOT volume is in READY state already VolumeVO readyRootVolume = null; List vols = _volsDao.findReadyRootVolumesByInstance(vm.getId()); - + for (VolumeVO vol : vols) { Volume.State state = vol.getState(); if (state == Volume.State.Ready) { - //make sure if this is a System VM, templateId is unchanged. If it is changed, let planner - //reassign pool for the volume - if(VirtualMachine.Type.isSystemVM(vm.getType())){ - Long volTemplateId = vol.getTemplateId(); - if(volTemplateId != null && template != null){ - if(volTemplateId.longValue() != template.getId()){ + // make sure if this is a System VM, templateId is unchanged. If it is changed, let planner + // reassign pool for the volume + if (VirtualMachine.Type.isSystemVM(vm.getType())) { + Long volTemplateId = vol.getTemplateId(); + if (volTemplateId != null && template != null) { + if (volTemplateId.longValue() != template.getId()) { if (s_logger.isDebugEnabled()) { - s_logger.debug("Root Volume " + vol + " of "+vm.getType().toString() +" System VM is ready, but volume's templateId does not match the System VM Template, updating templateId and reassigning a new pool"); + s_logger.debug("Root Volume " + vol + " of " + vm.getType().toString() + + " System VM is ready, but volume's templateId does not match the System VM Template, updating templateId and reassigning a new pool"); } - vol.setTemplateId(template.getId()); - _volsDao.update(vol.getId(), vol); - continue; - } - } + vol.setTemplateId(template.getId()); + _volsDao.update(vol.getId(), vol); + continue; + } + } - } + } StoragePoolVO pool = _storagePoolDao.findById(vol.getPoolId()); if (!pool.isInMaintenance()) { long rootVolDcId = pool.getDataCenterId(); @@ -599,12 +614,13 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene plan = new DataCenterDeployment(rootVolDcId, rootVolPodId, rootVolClusterId, null, vol.getPoolId()); readyRootVolume = vol; if (s_logger.isDebugEnabled()) { - s_logger.debug("Root Volume " + vol + " is ready, changing deployment plan to use this pool's datacenterId: "+rootVolDcId +" , podId: "+rootVolPodId +" , and clusterId: "+rootVolClusterId); + s_logger.debug("Root Volume " + vol + " is ready, changing deployment plan to use this pool's datacenterId: " + rootVolDcId + " , podId: " + rootVolPodId + + " , and clusterId: " + rootVolClusterId); } } - } + } } - + ExcludeList avoids = new ExcludeList(); int retry = _retry; while (retry-- != 0) { // It's != so that it can match -1. @@ -612,59 +628,59 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, account, params); DeployDestination dest = null; for (DeploymentPlanner planner : _planners) { - if (planner.canHandle(vmProfile, plan, avoids)) { - dest = planner.plan(vmProfile, plan, avoids); - } else { - continue; - } + if (planner.canHandle(vmProfile, plan, avoids)) { + dest = planner.plan(vmProfile, plan, avoids); + } else { + continue; + } if (dest != null) { avoids.addHost(dest.getHost().getId()); journal.record("Deployment found ", vmProfile, dest); break; } } - + if (dest == null) { - throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId()); + throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId()); } - + long destHostId = dest.getHost().getId(); - + if (!changeState(vm, Event.OperationRetry, destHostId, work, Step.Prepare)) { throw new ConcurrentOperationException("Unable to update the state of the Virtual Machine"); } - + try { - if (vm.getHypervisorType() != HypervisorType.BareMetal) { - if(readyRootVolume != null){ - //remove the vol<->pool from destination, since we don't have to prepare this volume. - if(dest.getStorageForDisks() != null){ + if (vm.getHypervisorType() != HypervisorType.BareMetal) { + if (readyRootVolume != null) { + // remove the vol<->pool from destination, since we don't have to prepare this volume. + if (dest.getStorageForDisks() != null) { if (s_logger.isDebugEnabled()) { s_logger.debug("No need to prepare the READY Root Volume " + readyRootVolume + ", removing it from deploydestination"); } - dest.getStorageForDisks().remove(readyRootVolume); - } - } - _storageMgr.prepare(vmProfile, dest); - } + dest.getStorageForDisks().remove(readyRootVolume); + } + } + _storageMgr.prepare(vmProfile, dest); + } _networkMgr.prepare(vmProfile, dest, ctx); - + vmGuru.finalizeVirtualMachineProfile(vmProfile, dest, ctx); - + VirtualMachineTO vmTO = hvGuru.implement(vmProfile); - + Commands cmds = new Commands(OnError.Revert); cmds.addCommand(new StartCommand(vmTO)); - + vmGuru.finalizeDeployment(cmds, vmProfile, dest, ctx); vm.setPodId(dest.getPod().getId()); - + work = _workDao.findById(work.getId()); if (work == null || work.getStep() != Step.Prepare) { throw new ConcurrentOperationException("Work steps have been changed: " + work); } _workDao.updateStep(work, Step.Starting); - + _agentMgr.send(destHostId, cmds); _workDao.updateStep(work, Step.Started); Answer startAnswer = cmds.getAnswer(StartAnswer.class); @@ -684,7 +700,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } catch (OperationTimedoutException e) { s_logger.debug("Unable to send the start command to host " + dest.getHost()); if (e.isActive()) { - //TODO: This one is different as we're not sure if the VM is actually started. + // TODO: This one is different as we're not sure if the VM is actually started. } avoids.addHost(destHostId); continue; @@ -718,19 +734,19 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene cleanup(vmGuru, vmProfile, work, Event.OperationFailed, false, caller, account); } } - } + } } finally { - if (startedVm == null) { - if(vm.getType().equals(VirtualMachine.Type.User)) { + if (startedVm == null) { + if (vm.getType().equals(VirtualMachine.Type.User)) { _accountMgr.decrementResourceCount(vm.getAccountId(), ResourceType.user_vm); } changeState(vm, Event.OperationFailed, null, work, Step.Done); } } - + return startedVm; } - + @Override public boolean stop(T vm, User user, Account account) throws ResourceUnavailableException { try { @@ -741,17 +757,17 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene throw new CloudRuntimeException("Unable to stop vm because of a concurrent operation", e); } } - + protected boolean sendStop(VirtualMachineGuru guru, VirtualMachineProfile profile, boolean force) { VMInstanceVO vm = profile.getVirtualMachine(); StopCommand stop = new StopCommand(vm, vm.getInstanceName(), null); try { - StopAnswer answer = (StopAnswer)_agentMgr.send(vm.getHostId(), stop); + StopAnswer answer = (StopAnswer) _agentMgr.send(vm.getHostId(), stop); if (!answer.getResult()) { s_logger.debug("Unable to stop VM due to " + answer.getDetails()); return false; } - + guru.finalizeStop(profile, answer); } catch (AgentUnavailableException e) { if (!force) { @@ -762,10 +778,10 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene return false; } } - + return true; } - + protected boolean cleanup(VirtualMachineGuru guru, VirtualMachineProfile profile, ItWorkVO work, Event event, boolean force, User user, Account account) { T vm = profile.getVirtualMachine(); State state = vm.getState(); @@ -776,7 +792,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.warn("Unable to cleanup vm " + vm + "; work state is incorrect: " + step); return false; } - + if (step == Step.Started || step == Step.Starting) { if (vm.getHostId() != null) { if (!sendStop(guru, profile, force)) { @@ -785,7 +801,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } } } - + if (step != Step.Release && step != Step.Prepare && step != Step.Started && step != Step.Starting) { s_logger.debug("Cleanup is not needed for vm " + vm + "; work state is incorrect: " + step); return true; @@ -803,7 +819,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process"); return false; } - } + } if (vm.getLastHostId() != null) { if (!sendStop(guru, profile, force)) { s_logger.warn("Failed to stop vm " + vm + " in " + State.Migrating + " state as a part of cleanup process"); @@ -816,7 +832,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene return false; } } - + _networkMgr.release(profile, force); _storageMgr.release(profile); s_logger.debug("Successfully cleanued up resources for the vm " + vm + " in " + state + " state"); @@ -833,21 +849,21 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } return true; } - + if (state == State.Destroyed || state == State.Expunging || state == State.Error) { if (s_logger.isDebugEnabled()) { s_logger.debug("Stopped called on " + vm + " but the state is " + state); } return true; } - + VirtualMachineGuru vmGuru = getVmGuru(vm); - + if (!stateTransitTo(vm, Event.StopRequested, vm.getHostId())) { if (!forced) { throw new ConcurrentOperationException("VM is being operated on by someone else."); } - + vm = vmGuru.findById(vmId); if (vm == null) { if (s_logger.isDebugEnabled()) { @@ -856,7 +872,8 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene return true; } } - + + VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); if ((vm.getState() == State.Starting || vm.getState() == State.Stopping || vm.getState() == State.Migrating) && forced) { ItWorkVO work = _workDao.findByOutstandingWork(vm.getId(), vm.getState()); if (work != null) { @@ -865,24 +882,23 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } } } - - VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); + if (vm.getHostId() != null) { String routerPrivateIp = null; - if(vm.getType() == VirtualMachine.Type.DomainRouter){ + if (vm.getType() == VirtualMachine.Type.DomainRouter) { routerPrivateIp = vm.getPrivateIpAddress(); } StopCommand stop = new StopCommand(vm, vm.getInstanceName(), null, routerPrivateIp); boolean stopped = false; StopAnswer answer = null; try { - answer = (StopAnswer)_agentMgr.send(vm.getHostId(), stop); + answer = (StopAnswer) _agentMgr.send(vm.getHostId(), stop); stopped = answer.getResult(); if (!stopped) { throw new CloudRuntimeException("Unable to stop the virtual machine due to " + answer.getDetails()); } vmGuru.finalizeStop(profile, answer); - + } catch (AgentUnavailableException e) { } catch (OperationTimedoutException e) { } finally { @@ -893,69 +909,70 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene return false; } else { s_logger.warn("Unable to actually stop " + vm + " but continue with release because it's a force stop"); + vmGuru.finalizeStop(profile, answer); } } } } - + if (s_logger.isDebugEnabled()) { s_logger.debug(vm + " is stopped on the host. Proceeding to release resource held."); } - + try { _networkMgr.release(profile, forced); s_logger.debug("Successfully released network resources for the vm " + vm); } catch (Exception e) { s_logger.warn("Unable to release some network resources.", e); } - + try { - if (vm.getHypervisorType() != HypervisorType.BareMetal) { - _storageMgr.release(profile); - s_logger.debug("Successfully released storage resources for the vm " + vm); - } + if (vm.getHypervisorType() != HypervisorType.BareMetal) { + _storageMgr.release(profile); + s_logger.debug("Successfully released storage resources for the vm " + vm); + } } catch (Exception e) { s_logger.warn("Unable to release storage resources.", e); } - + vm.setReservationId(null); - + return stateTransitTo(vm, Event.OperationSucceeded, null); } - + private void setStateMachine() { - _stateMachine = VirtualMachine.State.getStateMachine(); + _stateMachine = VirtualMachine.State.getStateMachine(); } - + protected boolean stateTransitTo(VMInstanceVO vm, VirtualMachine.Event e, Long hostId, String reservationId) { vm.setReservationId(reservationId); return _stateMachine.transitTo(vm, e, hostId, _vmDao); } - + @Override public boolean stateTransitTo(VMInstanceVO vm, VirtualMachine.Event e, Long hostId) { State oldState = vm.getState(); - if (oldState == State.Starting ) { + if (oldState == State.Starting) { if (e == Event.OperationSucceeded) { vm.setLastHostId(hostId); } - }else if (oldState == State.Stopping ) { + } else if (oldState == State.Stopping) { if (e == Event.OperationSucceeded) { vm.setLastHostId(vm.getHostId()); } } return _stateMachine.transitTo(vm, e, hostId, _vmDao); } - + @Override public boolean remove(T vm, User user, Account caller) { - //expunge the corresponding nics + // expunge the corresponding nics VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); _networkMgr.expungeNics(profile); s_logger.trace("Nics of the vm " + vm + " are expunged successfully"); return _vmDao.remove(vm.getId()); } - + @Override public boolean destroy(T vm, User user, Account caller) throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException { if (s_logger.isDebugEnabled()) { @@ -967,12 +984,12 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } return true; } - + if (!advanceStop(vm, false, user, caller)) { s_logger.debug("Unable to stop " + vm); return false; } - + if (!stateTransitTo(vm, VirtualMachine.Event.DestroyRequested, vm.getHostId())) { s_logger.debug("Unable to destroy the vm because it is not in the correct state: " + vm.toString()); return false; @@ -980,34 +997,35 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene return true; } - + protected boolean checkVmOnHost(VirtualMachine vm, long hostId) throws AgentUnavailableException, OperationTimedoutException { - CheckVirtualMachineAnswer answer = (CheckVirtualMachineAnswer)_agentMgr.send(hostId, new CheckVirtualMachineCommand(vm.getInstanceName())); + CheckVirtualMachineAnswer answer = (CheckVirtualMachineAnswer) _agentMgr.send(hostId, new CheckVirtualMachineCommand(vm.getInstanceName())); if (!answer.getResult() || answer.getState() == State.Stopped) { return false; } - + return true; } - + @Override - public T migrate(T vm, long srcHostId, DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException, ManagementServerException, VirtualMachineMigrationException { + public T migrate(T vm, long srcHostId, DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException, ManagementServerException, + VirtualMachineMigrationException { s_logger.info("Migrating " + vm + " to " + dest); - + long dstHostId = dest.getHost().getId(); Host fromHost = _hostDao.findById(srcHostId); if (fromHost == null) { s_logger.info("Unable to find the host to migrate from: " + srcHostId); throw new CloudRuntimeException("Unable to find the host to migrate from: " + srcHostId); - } - - if(fromHost.getClusterId().longValue() != dest.getCluster().getId()){ + } + + if (fromHost.getClusterId().longValue() != dest.getCluster().getId()) { s_logger.info("Source and destination host are not in same cluster, unable to migrate to host: " + dest.getHost().getId()); throw new CloudRuntimeException("Source and destination host are not in same cluster, unable to migrate to host: " + dest.getHost().getId()); } - + VirtualMachineGuru vmGuru = getVmGuru(vm); - + long vmId = vm.getId(); vm = vmGuru.findById(vmId); if (vm == null) { @@ -1016,38 +1034,38 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } throw new ManagementServerException("Unable to find a virtual machine with id " + vmId); } - - if(vm.getState() != State.Running){ + + if (vm.getState() != State.Running) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not Running, unable to migrate the vm " + vm); } throw new VirtualMachineMigrationException("VM is not Running, unable to migrate the vm currently " + vm); } - + short alertType = AlertManager.ALERT_TYPE_USERVM_MIGRATE; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_CONSOLE_PROXY_MIGRATE; } - + VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); _networkMgr.prepareNicForMigration(profile, dest); _storageMgr.prepareForMigration(profile, dest); HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); - + VirtualMachineTO to = hvGuru.implement(profile); PrepareForMigrationCommand pfmc = new PrepareForMigrationCommand(to); - + ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId()); work.setStep(Step.Prepare); work.setResourceType(ItWorkVO.ResourceType.Host); work.setResourceId(dstHostId); work = _workDao.persist(work); - + PrepareForMigrationAnswer pfma = null; try { - pfma = (PrepareForMigrationAnswer)_agentMgr.send(dstHostId, pfmc); + pfma = (PrepareForMigrationAnswer) _agentMgr.send(dstHostId, pfmc); if (!pfma.getResult()) { String msg = "Unable to prepare for migration due to " + pfma.getDetails(); pfma = null; @@ -1061,19 +1079,19 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene _workDao.update(work.getId(), work); } } - + vm.setLastHostId(srcHostId); if (vm == null || vm.getHostId() == null || vm.getHostId() != srcHostId || !changeState(vm, Event.MigrationRequested, dstHostId, work, Step.Migrating)) { s_logger.info("Migration cancelled because state has changed: " + vm); throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm); - } - + } + boolean migrated = false; try { boolean isWindows = _guestOsCategoryDao.findById(_guestOsDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows"); MigrateCommand mc = new MigrateCommand(vm.getInstanceName(), dest.getHost().getPrivateIpAddress(), isWindows); try { - MigrateAnswer ma = (MigrateAnswer)_agentMgr.send(vm.getLastHostId(), mc); + MigrateAnswer ma = (MigrateAnswer) _agentMgr.send(vm.getLastHostId(), mc); if (!ma.getResult()) { s_logger.error("Unable to migrate due to " + ma.getDetails()); return null; @@ -1085,45 +1103,45 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } throw new AgentUnavailableException("Operation timed out on migrating " + vm, dstHostId); } - + changeState(vm, VirtualMachine.Event.OperationSucceeded, dstHostId, work, Step.Started); - + try { if (!checkVmOnHost(vm, dstHostId)) { s_logger.error("Unable to complete migration for " + vm); - try{ - _agentMgr.send(srcHostId, new Commands(cleanup(vm.getInstanceName())), null); - }catch (AgentUnavailableException e) { + try { + _agentMgr.send(srcHostId, new Commands(cleanup(vm.getInstanceName())), null); + } catch (AgentUnavailableException e) { s_logger.error("AgentUnavailableException while cleanup on source host: " + srcHostId); } cleanup(vmGuru, new VirtualMachineProfileImpl(vm), work, Event.AgentReportStopped, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); return null; } } catch (OperationTimedoutException e) { - } - + } + migrated = true; return vm; } finally { if (!migrated) { s_logger.info("Migration was unsuccessful. Cleaning up: " + vm); - _alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHost.getName() + " in zone " + dest.getDataCenter().getName() + " and pod " + dest.getPod().getName(), "Migrate Command failed. Please check logs."); - try{ - _agentMgr.send(dstHostId, new Commands(cleanup(vm.getInstanceName())), null); - }catch(AgentUnavailableException ae){ - s_logger.info("Looks like the destination Host is unavailable for cleanup"); + _alertMgr.sendAlert(alertType, fromHost.getDataCenterId(), fromHost.getPodId(), "Unable to migrate vm " + vm.getName() + " from host " + fromHost.getName() + " in zone " + + dest.getDataCenter().getName() + " and pod " + dest.getPod().getName(), "Migrate Command failed. Please check logs."); + try { + _agentMgr.send(dstHostId, new Commands(cleanup(vm.getInstanceName())), null); + } catch (AgentUnavailableException ae) { + s_logger.info("Looks like the destination Host is unavailable for cleanup"); } - + stateTransitTo(vm, Event.OperationFailed, srcHostId); } - + work.setStep(Step.Done); _workDao.update(work.getId(), work); } } - protected void cancelWorkItems(long nodeId) { GlobalLock scanLock = GlobalLock.getInternLock("vmmgr.cancel.workitem"); @@ -1143,7 +1161,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } else if (work.getType() == State.Migrating) { _haMgr.scheduleMigration(vm); } - } + } work.setStep(Step.Done); _workDao.update(work.getId(), work); } catch (Exception e) { @@ -1158,7 +1176,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene scanLock.releaseRef(); } } - + @Override public boolean migrateAway(VirtualMachine.Type vmType, long vmId, long srcHostId) throws InsufficientServerCapacityException, VirtualMachineMigrationException { VirtualMachineGuru vmGuru = _vmGurus.get(vmType); @@ -1167,21 +1185,21 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.debug("Unable to find a VM for " + vmId); return true; } - - VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); - + + VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); + Long hostId = vm.getHostId(); if (hostId == null) { s_logger.debug("Unable to migrate because the VM doesn't have a host id: " + vm); return true; } - + Host host = _hostDao.findById(hostId); - + DataCenterDeployment plan = new DataCenterDeployment(host.getDataCenterId(), host.getPodId(), host.getClusterId(), null, null); ExcludeList excludes = new ExcludeList(); excludes.addHost(hostId); - + DeployDestination dest = null; while (true) { for (DeploymentPlanner planner : _planners) { @@ -1196,11 +1214,11 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.debug("Planner " + planner + " was unable to find anything."); } } - + if (dest == null) { throw new InsufficientServerCapacityException("Unable to find a server to migrate to.", host.getClusterId()); } - + excludes.addHost(dest.getHost().getId()); VMInstanceVO vmInstance = null; try { @@ -1208,24 +1226,24 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } catch (ResourceUnavailableException e) { s_logger.debug("Unable to migrate to unavailable " + dest); } catch (ConcurrentOperationException e) { - s_logger.debug("Unable to migrate VM due to: " + e.getMessage()); - } catch (ManagementServerException e) { - s_logger.debug("Unable to migrate VM: " + e.getMessage()); - } catch (VirtualMachineMigrationException e) { - s_logger.debug("Got VirtualMachineMigrationException, Unable to migrate: " + e.getMessage()); - if(vm.getState() == State.Starting){ - s_logger.debug("VM seems to be still Starting, we should retry migration later"); - throw e; - }else{ - s_logger.debug("Unable to migrate VM, VM is not in Running or even Starting state, current state: "+vm.getState().toString()); - } - } + s_logger.debug("Unable to migrate VM due to: " + e.getMessage()); + } catch (ManagementServerException e) { + s_logger.debug("Unable to migrate VM: " + e.getMessage()); + } catch (VirtualMachineMigrationException e) { + s_logger.debug("Got VirtualMachineMigrationException, Unable to migrate: " + e.getMessage()); + if (vm.getState() == State.Starting) { + s_logger.debug("VM seems to be still Starting, we should retry migration later"); + throw e; + } else { + s_logger.debug("Unable to migrate VM, VM is not in Running or even Starting state, current state: " + vm.getState().toString()); + } + } if (vmInstance != null) { return true; } - try { - boolean result = advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); - return result; + try { + boolean result = advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); + return result; } catch (ResourceUnavailableException e) { s_logger.debug("Unable to stop VM due to " + e.getMessage()); } catch (ConcurrentOperationException e) { @@ -1233,10 +1251,10 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } catch (OperationTimedoutException e) { s_logger.debug("Unable to stop VM due to " + e.getMessage()); } - return false; - } + return false; + } } - + protected class CleanupTask implements Runnable { @Override public void run() { @@ -1248,7 +1266,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } } } - + @Override public T reboot(T vm, Map params, User caller, Account account) throws InsufficientCapacityException, ResourceUnavailableException { try { @@ -1257,11 +1275,12 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene throw new CloudRuntimeException("Unable to reboot a VM due to concurrent operation", e); } } - + @Override - public T advanceReboot(T vm, Map params, User caller, Account account) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException { + public T advanceReboot(T vm, Map params, User caller, Account account) throws InsufficientCapacityException, + ConcurrentOperationException, ResourceUnavailableException { T rebootedVm = null; - + DataCenter dc = _configMgr.getZone(vm.getDataCenterId()); HostPodVO pod = _configMgr.getPod(vm.getPodId()); Host host = _hostDao.findById(vm.getHostId()); @@ -1270,13 +1289,13 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene cluster = _configMgr.getCluster(host.getClusterId()); } DeployDestination dest = new DeployDestination(dc, pod, cluster, host); - + try { - + Commands cmds = new Commands(OnError.Revert); cmds.addCommand(new RebootCommand(vm.getName())); _agentMgr.send(host.getId(), cmds); - + Answer rebootAnswer = cmds.getAnswer(RebootAnswer.class); if (rebootAnswer != null && rebootAnswer.getResult()) { rebootedVm = vm; @@ -1287,26 +1306,26 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.warn("Unable to send the reboot command to host " + dest.getHost() + " for the vm " + vm + " due to operation timeout", e); throw new CloudRuntimeException("Failed to reboot the vm on host " + dest.getHost()); } - + return rebootedVm; } - + @Override public VMInstanceVO findById(VirtualMachine.Type type, long vmId) { VirtualMachineGuru guru = _vmGurus.get(type); return guru.findById(vmId); } - + public Command cleanup(String vmName) { return new StopCommand(vmName); } - + public Commands deltaSync(long hostId, Map newStates) { Map states = convertToInfos(newStates); Commands commands = new Commands(OnError.Continue); - + boolean nativeHA = _agentMgr.isHostNativeHAEnabled(hostId); - + for (Map.Entry entry : states.entrySet()) { AgentVmInfo info = entry.getValue(); @@ -1315,7 +1334,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene Command command = null; if (vm != null) { command = compareState(vm, info, false, nativeHA); - } else { + } else { if (s_logger.isDebugEnabled()) { s_logger.debug("Cleaning up a VM that is no longer found: " + info.name); } @@ -1329,7 +1348,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene return commands; } - + protected Map convertToInfos(final Map states) { final HashMap map = new HashMap(); @@ -1349,7 +1368,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene map.put(vm.getId(), new AgentVmInfo(entry.getKey(), vmGuru, vm, entry.getValue())); break; } - + Long id = vmGuru.convertToId(name); if (id != null) { map.put(id, new AgentVmInfo(entry.getKey(), vmGuru, null, entry.getValue())); @@ -1361,28 +1380,27 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } /** - * compareState does as its name suggests and compares the states between - * management server and agent. It returns whether something should be - * cleaned up - * + * compareState does as its name suggests and compares the states between management server and agent. It returns whether + * something should be cleaned up + * */ protected Command compareState(VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync, boolean nativeHA) { State agentState = info.state; final String agentName = info.name; final State serverState = vm.getState(); final String serverName = vm.getName(); - + VirtualMachineGuru vmGuru = getVmGuru(vm); - + Command command = null; if (s_logger.isDebugEnabled()) { s_logger.debug("VM " + serverName + ": server state = " + serverState.toString() + " and agent state = " + agentState.toString()); } - + if (agentState == State.Error) { agentState = State.Stopped; - + short alertType = AlertManager.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.ALERT_TYPE_DOMAIN_ROUTER; @@ -1393,15 +1411,16 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene HostPodVO podVO = _podDao.findById(vm.getPodId()); DataCenterVO dcVO = _dcDao.findById(vm.getDataCenterId()); HostVO hostVO = _hostDao.findById(vm.getHostId()); - + String hostDesc = "name: " + hostVO.getName() + " (id:" + hostVO.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " + vm.getName() + ", id: " + vm.getId() + ") stopped on host " + hostDesc + " due to storage failure", "Virtual Machine " + vm.getName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped due to storage failure."); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodId(), "VM (name: " + vm.getName() + ", id: " + vm.getId() + ") stopped on host " + hostDesc + " due to storage failure", + "Virtual Machine " + vm.getName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped due to storage failure."); } - -// if (serverState == State.Migrating) { -// s_logger.debug("Skipping vm in migrating state: " + vm.toString()); -// return null; -// } + + // if (serverState == State.Migrating) { + // s_logger.debug("Skipping vm in migrating state: " + vm.toString()); + // return null; + // } if (agentState == serverState) { if (s_logger.isDebugEnabled()) { @@ -1410,34 +1429,34 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene assert (agentState == State.Stopped || agentState == State.Running) : "If the states we send up is changed, this must be changed."; if (agentState == State.Running) { stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, vm.getHostId()); - // FIXME: What if someone comes in and sets it to stopping? Then what? + // FIXME: What if someone comes in and sets it to stopping? Then what? return null; } s_logger.debug("State matches but the agent said stopped so let's send a cleanup command anyways."); return cleanup(agentName); - } - + } + if (agentState == State.Shutdowned) { if (serverState == State.Running || serverState == State.Starting || serverState == State.Stopping) { try { advanceStop(vm, true, _accountMgr.getSystemUser(), _accountMgr.getSystemAccount()); } catch (AgentUnavailableException e) { - assert(false) : "How do we hit this with forced on?"; + assert (false) : "How do we hit this with forced on?"; return null; } catch (OperationTimedoutException e) { - assert(false) : "How do we hit this with forced on?"; + assert (false) : "How do we hit this with forced on?"; return null; } catch (ConcurrentOperationException e) { - assert(false) : "How do we hit this with forced on?"; + assert (false) : "How do we hit this with forced on?"; return null; } } else { - s_logger.debug("Sending cleanup to a shutdowned vm: " + agentName); + s_logger.debug("Sending cleanup to a shutdowned vm: " + agentName); command = cleanup(agentName); } } else if (agentState == State.Stopped) { // This state means the VM on the agent was detected previously - // and now is gone. This is slightly different than if the VM + // and now is gone. This is slightly different than if the VM // was never completed but we still send down a Stop Command // to ensure there's cleanup. if (serverState == State.Running) { @@ -1449,7 +1468,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } else if (serverState == State.Starting) { s_logger.debug("Ignoring VM in starting mode: " + vm.getName()); _haMgr.scheduleRestart(vm, false); - } + } command = cleanup(agentName); } else if (agentState == State.Running) { if (serverState == State.Starting) { @@ -1459,38 +1478,36 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene stateTransitTo(vm, Event.AgentReportRunning, vm.getHostId()); s_logger.debug("VM's " + vm + " state is starting on full sync so updating it to Running"); vm = vmGuru.findById(vm.getId()); - + VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); - + Commands cmds = new Commands(OnError.Revert); s_logger.debug("Finalizing commands that need to be send to complete Start process for the vm " + vm); - - + if (vmGuru.finalizeCommandsOnStart(cmds, profile)) { if (cmds.size() != 0) { try { _agentMgr.send(vm.getHostId(), cmds); - } catch (OperationTimedoutException e){ - s_logger.error("Exception during update for running vm: " + vm, e); + } catch (OperationTimedoutException e) { + s_logger.error("Exception during update for running vm: " + vm, e); return null; } catch (ResourceUnavailableException e) { - s_logger.error("Exception during update for running vm: " + vm, e); + s_logger.error("Exception during update for running vm: " + vm, e); return null; } } - + if (vmGuru.finalizeStart(profile, vm.getHostId(), cmds, null)) { stateTransitTo(vm, Event.AgentReportRunning, vm.getHostId()); } else { - s_logger.error("Exception during update for running vm: " + vm); + s_logger.error("Exception during update for running vm: " + vm); return null; } } else { s_logger.error("Unable to finalize commands on start for vm: " + vm); return null; } - - + } } else if (serverState == State.Stopping) { s_logger.debug("Scheduling a stop command for " + vm); @@ -1498,7 +1515,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } else { s_logger.debug("VM state is in stopped so stopping it on the agent"); command = cleanup(agentName); - } + } } return command; } @@ -1519,10 +1536,10 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene if (info == null) { info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped); castedVm = info.guru.findById(vm.getId()); - } else { + } else { castedVm = info.vm; } - + Command command = compareState(castedVm, info, true, nativeHA); if (command != null) { commands.addCommand(command); @@ -1552,7 +1569,6 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene return commands; } - @Override public boolean isRecurring() { return false; @@ -1571,12 +1587,12 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } return true; } - + @Override public boolean processTimeout(long agentId, long seq) { return true; } - + @Override public int getTimeout() { return -1; @@ -1587,7 +1603,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene boolean processed = false; for (Command cmd : cmds) { if (cmd instanceof PingRoutingCommand) { - PingRoutingCommand ping = (PingRoutingCommand)cmd; + PingRoutingCommand ping = (PingRoutingCommand) cmd; if (ping.getNewStates().size() > 0) { Commands commands = deltaSync(agentId, ping.getNewStates()); if (commands.size() > 0) { @@ -1608,24 +1624,24 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene public AgentControlAnswer processControlCommand(long agentId, AgentControlCommand cmd) { return null; } - + @Override public boolean processDisconnect(long agentId, Status state) { return true; } - + @Override public void processConnect(HostVO agent, StartupCommand cmd) throws ConnectionException { if (!(cmd instanceof StartupRoutingCommand)) { return; } - + long agentId = agent.getId(); - - StartupRoutingCommand startup = (StartupRoutingCommand)cmd; - + + StartupRoutingCommand startup = (StartupRoutingCommand) cmd; + Commands commands = fullSync(agentId, startup.getVmStates()); - + if (commands.size() > 0) { s_logger.debug("Sending clean commands to the agent"); @@ -1650,7 +1666,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } } } - + protected class TransitionTask implements Runnable { @Override public void run() { @@ -1659,7 +1675,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene s_logger.debug("Couldn't get the global lock"); return; } - + if (!lock.lock(30)) { s_logger.debug("Couldn't lock the db"); return; @@ -1683,7 +1699,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene } } } - + protected class AgentVmInfo { public String name; public State state; @@ -1695,7 +1711,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene this.name = name; this.state = state; this.vm = vm; - this.guru = (VirtualMachineGuru)guru; + this.guru = (VirtualMachineGuru) guru; } } }