diff --git a/api/src/com/cloud/dc/DataCenter.java b/api/src/com/cloud/dc/DataCenter.java index 5b3d3c01f30..7d434c5f231 100644 --- a/api/src/com/cloud/dc/DataCenter.java +++ b/api/src/com/cloud/dc/DataCenter.java @@ -18,15 +18,14 @@ package com.cloud.dc; import com.cloud.org.Grouping; import org.apache.cloudstack.acl.InfrastructureEntity; -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; +import org.apache.cloudstack.kernel.Partition; import java.util.Map; /** * */ -public interface DataCenter extends InfrastructureEntity, Grouping, Identity, InternalIdentity { +public interface DataCenter extends InfrastructureEntity, Grouping, Partition { public enum NetworkType { Basic, Advanced, diff --git a/api/src/com/cloud/event/EventTypes.java b/api/src/com/cloud/event/EventTypes.java index 5784329afc9..f641c09aa76 100755 --- a/api/src/com/cloud/event/EventTypes.java +++ b/api/src/com/cloud/event/EventTypes.java @@ -74,6 +74,7 @@ import com.cloud.template.VirtualMachineTemplate; import com.cloud.user.Account; import com.cloud.user.User; import com.cloud.vm.VirtualMachine; +import org.apache.cloudstack.ha.HAConfig; public class EventTypes { @@ -318,6 +319,12 @@ public class EventTypes { public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD = "HOST.OOBM.CHANGEPASSWORD"; public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION = "HOST.OOBM.POWERSTATE.TRANSITION"; + // HA + public static final String EVENT_HA_RESOURCE_ENABLE = "HA.RESOURCE.ENABLE"; + public static final String EVENT_HA_RESOURCE_DISABLE = "HA.RESOURCE.DISABLE"; + public static final String EVENT_HA_RESOURCE_CONFIGURE = "HA.RESOURCE.CONFIGURE"; + public static final String EVENT_HA_STATE_TRANSITION = "HA.STATE.TRANSITION"; + // Maintenance public static final String EVENT_MAINTENANCE_CANCEL = "MAINT.CANCEL"; public static final String EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE = "MAINT.CANCEL.PS"; @@ -754,6 +761,12 @@ public class EventTypes { entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD, Host.class); entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION, Host.class); + // HA + entityEventDetails.put(EVENT_HA_RESOURCE_ENABLE, HAConfig.class); + entityEventDetails.put(EVENT_HA_RESOURCE_DISABLE, HAConfig.class); + entityEventDetails.put(EVENT_HA_RESOURCE_CONFIGURE, HAConfig.class); + entityEventDetails.put(EVENT_HA_STATE_TRANSITION, HAConfig.class); + // Maintenance entityEventDetails.put(EVENT_MAINTENANCE_CANCEL, Host.class); entityEventDetails.put(EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE, Host.class); diff --git a/api/src/com/cloud/host/Host.java b/api/src/com/cloud/host/Host.java index 689ed12b64e..d3e3856df18 100755 --- a/api/src/com/cloud/host/Host.java +++ b/api/src/com/cloud/host/Host.java @@ -16,19 +16,19 @@ // under the License. package com.cloud.host; -import java.util.Date; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; - import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.resource.ResourceState; import com.cloud.utils.fsm.StateObject; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.kernel.Partition; + +import java.util.Date; /** * Host represents one particular host server. */ -public interface Host extends StateObject, Identity, InternalIdentity { +public interface Host extends StateObject, Identity, Partition, HAResource { public enum Type { Storage(false), Routing(false), SecondaryStorage(false), SecondaryStorageCmdExecutor(false), ConsoleProxy(true), ExternalFirewall(false), ExternalLoadBalancer( false), ExternalVirtualSwitchSupervisor(false), PxeServer(false), BaremetalPxe(false), BaremetalDhcp(false), TrafficMonitor(false), @@ -202,5 +202,7 @@ public interface Host extends StateObject, Identity, InternalIdentity { boolean isInMaintenanceStates(); + boolean isDisabled(); + ResourceState getResourceState(); } diff --git a/api/src/com/cloud/host/Status.java b/api/src/com/cloud/host/Status.java index 73e6cc9185a..e381115db41 100755 --- a/api/src/com/cloud/host/Status.java +++ b/api/src/com/cloud/host/Status.java @@ -150,12 +150,14 @@ public enum Status { s_fsm.addTransition(Status.Down, Event.ManagementServerDown, Status.Down); s_fsm.addTransition(Status.Down, Event.AgentDisconnected, Status.Down); s_fsm.addTransition(Status.Down, Event.PingTimeout, Status.Down); + s_fsm.addTransition(Status.Down, Event.HostDown, Status.Down); s_fsm.addTransition(Status.Alert, Event.AgentConnected, Status.Connecting); s_fsm.addTransition(Status.Alert, Event.Ping, Status.Up); s_fsm.addTransition(Status.Alert, Event.Remove, Status.Removed); s_fsm.addTransition(Status.Alert, Event.ManagementServerDown, Status.Alert); s_fsm.addTransition(Status.Alert, Event.AgentDisconnected, Status.Alert); s_fsm.addTransition(Status.Alert, Event.ShutdownRequested, Status.Disconnected); + s_fsm.addTransition(Status.Alert, Event.HostDown, Status.Down); s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected); s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting); s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected); diff --git a/api/src/com/cloud/org/Cluster.java b/api/src/com/cloud/org/Cluster.java index 90fcb5729e4..4079c88dfde 100644 --- a/api/src/com/cloud/org/Cluster.java +++ b/api/src/com/cloud/org/Cluster.java @@ -16,13 +16,11 @@ // under the License. package com.cloud.org; -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; - import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.org.Managed.ManagedState; +import org.apache.cloudstack.kernel.Partition; -public interface Cluster extends Grouping, InternalIdentity, Identity { +public interface Cluster extends Grouping, Partition { public static enum ClusterType { CloudManaged, ExternalManaged; }; diff --git a/api/src/com/cloud/resource/ResourceState.java b/api/src/com/cloud/resource/ResourceState.java index 5d2c962f989..d952afa0b7d 100755 --- a/api/src/com/cloud/resource/ResourceState.java +++ b/api/src/com/cloud/resource/ResourceState.java @@ -93,6 +93,7 @@ public enum ResourceState { s_fsm.addTransition(ResourceState.Enabled, Event.InternalCreated, ResourceState.Enabled); s_fsm.addTransition(ResourceState.Enabled, Event.Disable, ResourceState.Disabled); s_fsm.addTransition(ResourceState.Enabled, Event.AdminAskMaintenace, ResourceState.PrepareForMaintenance); + s_fsm.addTransition(ResourceState.Enabled, Event.InternalEnterMaintenance, ResourceState.Maintenance); s_fsm.addTransition(ResourceState.Disabled, Event.Enable, ResourceState.Enabled); s_fsm.addTransition(ResourceState.Disabled, Event.Disable, ResourceState.Disabled); s_fsm.addTransition(ResourceState.Disabled, Event.InternalCreated, ResourceState.Disabled); @@ -109,5 +110,7 @@ public enum ResourceState { s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.InternalEnterMaintenance, ResourceState.Maintenance); s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.AdminCancelMaintenance, ResourceState.Enabled); s_fsm.addTransition(ResourceState.Error, Event.InternalCreated, ResourceState.Error); + s_fsm.addTransition(ResourceState.Disabled, Event.DeleteHost, ResourceState.Disabled); + } } diff --git a/api/src/com/cloud/vm/VirtualMachine.java b/api/src/com/cloud/vm/VirtualMachine.java index b45ac7c9be9..8b4a60541aa 100755 --- a/api/src/com/cloud/vm/VirtualMachine.java +++ b/api/src/com/cloud/vm/VirtualMachine.java @@ -16,26 +16,24 @@ // under the License. package com.cloud.vm; -import java.util.Arrays; -import java.util.Date; -import java.util.Map; - -import org.apache.cloudstack.acl.ControlledEntity; -import org.apache.cloudstack.api.Displayable; -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; - import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.utils.fsm.StateMachine2; import com.cloud.utils.fsm.StateMachine2.Transition; import com.cloud.utils.fsm.StateMachine2.Transition.Impact; import com.cloud.utils.fsm.StateObject; +import org.apache.cloudstack.acl.ControlledEntity; +import org.apache.cloudstack.api.Displayable; +import org.apache.cloudstack.kernel.Partition; + +import java.util.Arrays; +import java.util.Date; +import java.util.Map; /** * VirtualMachine describes the properties held by a virtual machine * */ -public interface VirtualMachine extends RunningOn, ControlledEntity, Identity, InternalIdentity, Displayable, StateObject { +public interface VirtualMachine extends RunningOn, ControlledEntity, Partition, Displayable, StateObject { public enum PowerState { PowerUnknown, diff --git a/api/src/org/apache/cloudstack/alert/AlertService.java b/api/src/org/apache/cloudstack/alert/AlertService.java index fba80698071..1c33125d093 100644 --- a/api/src/org/apache/cloudstack/alert/AlertService.java +++ b/api/src/org/apache/cloudstack/alert/AlertService.java @@ -16,12 +16,12 @@ // under the License. package org.apache.cloudstack.alert; -import java.util.HashSet; -import java.util.Set; - import com.cloud.capacity.Capacity; import com.cloud.exception.InvalidParameterValueException; +import java.util.HashSet; +import java.util.Set; + public interface AlertService { public static class AlertType { private static Set defaultAlertTypes = new HashSet(); @@ -66,6 +66,7 @@ public interface AlertService { public static final AlertType ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED = new AlertType((short)26, "ALERT.RESOURCE.EXCEED", true); public static final AlertType ALERT_TYPE_SYNC = new AlertType((short)27, "ALERT.TYPE.SYNC", true); public static final AlertType ALERT_TYPE_OOBM_AUTH_ERROR = new AlertType((short)29, "ALERT.OOBM.AUTHERROR", true); + public static final AlertType ALERT_TYPE_HA_ACTION = new AlertType((short)30, "ALERT.HA.ACTION", true); public short getType() { return type; diff --git a/api/src/org/apache/cloudstack/api/ApiConstants.java b/api/src/org/apache/cloudstack/api/ApiConstants.java index 647d44b5464..0915067ba7a 100755 --- a/api/src/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/org/apache/cloudstack/api/ApiConstants.java @@ -21,6 +21,7 @@ public class ApiConstants { public static final String ACCOUNTS = "accounts"; public static final String ACCOUNT_TYPE = "accounttype"; public static final String ACCOUNT_ID = "accountid"; + public static final String ACTIVITY = "activity"; public static final String ADDRESS = "address"; public static final String ALGORITHM = "algorithm"; public static final String ALLOCATED_ONLY = "allocatedonly"; @@ -93,6 +94,7 @@ public class ApiConstants { public static final String DOMAIN_ID = "domainid"; public static final String DOMAIN__ID = "domainId"; public static final String DURATION = "duration"; + public static final String ELIGIBLE = "eligible"; public static final String EMAIL = "email"; public static final String END_DATE = "enddate"; public static final String END_IP = "endip"; @@ -100,6 +102,7 @@ public class ApiConstants { public static final String END_PORT = "endport"; public static final String ENTRY_TIME = "entrytime"; public static final String EXPIRES = "expires"; + public static final String FENCE = "fence"; public static final String FETCH_LATEST = "fetchlatest"; public static final String FIRSTNAME = "firstname"; public static final String FORCED = "forced"; @@ -118,6 +121,9 @@ public class ApiConstants { public static final String GUEST_CIDR_ADDRESS = "guestcidraddress"; public static final String GUEST_VLAN_RANGE = "guestvlanrange"; public static final String HA_ENABLE = "haenable"; + public static final String HA_PROVIDER = "haprovider"; + public static final String HA_STATE = "hastate"; + public static final String HEALTH = "health"; public static final String HOST_ID = "hostid"; public static final String HOST_NAME = "hostname"; public static final String HYPERVISOR = "hypervisor"; @@ -212,6 +218,7 @@ public class ApiConstants { public static final String PUBLIC_END_PORT = "publicendport"; public static final String PUBLIC_ZONE = "publiczone"; public static final String RECEIVED_BYTES = "receivedbytes"; + public static final String RECOVER = "recover"; public static final String REQUIRES_HVM = "requireshvm"; public static final String RESOURCE_TYPE = "resourcetype"; public static final String RESPONSE = "response"; diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/ConfigureHAForHostCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/ConfigureHAForHostCmd.java new file mode 100644 index 00000000000..f85dbb23504 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/ConfigureHAForHostCmd.java @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; + +@APICommand(name = ConfigureHAForHostCmd.APINAME, description = "Configures HA for a host", + responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class ConfigureHAForHostCmd extends BaseAsyncCmd { + public static final String APINAME = "configureHAForHost"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + @Parameter(name = ApiConstants.PROVIDER, type = CommandType.STRING, + description = "HA provider", required = true, validations = {ApiArgValidator.NotNullOrEmpty}) + private String haProvider; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + public String getHaProvider() { + return haProvider; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + private void setupResponse(final boolean result, final String resourceUuid) { + final HostHAResponse response = new HostHAResponse(); + response.setId(resourceUuid); + response.setProvider(getHaProvider().toLowerCase()); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + + final boolean result = haConfigManager.configureHA(host.getId(), HAResource.ResourceType.Host, getHaProvider()); + if (!result) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to configure HA provider for the host"); + } + CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA configured with provider: " + getHaProvider()); + CallContext.current().putContextParameter(Host.class, host.getUuid()); + + setupResponse(result, host.getUuid()); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "configure HA for host: " + getHostId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForClusterCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForClusterCmd.java new file mode 100644 index 00000000000..053c978b831 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForClusterCmd.java @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.org.Cluster; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.ClusterResponse; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = DisableHAForClusterCmd.APINAME, description = "Disables HA cluster-wide", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class DisableHAForClusterCmd extends BaseAsyncCmd { + public static final String APINAME = "disableHAForCluster"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.CLUSTER_ID, type = BaseCmd.CommandType.UUID, entityType = ClusterResponse.class, + description = "ID of the cluster", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long clusterId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getClusterId() { + return clusterId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Cluster cluster = _resourceService.getCluster(getClusterId()); + if (cluster == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find cluster by ID: " + getClusterId()); + } + final boolean result = haConfigManager.disableHA(cluster); + CallContext.current().setEventDetails("Cluster Id:" + cluster.getId() + " HA enabled: false"); + CallContext.current().putContextParameter(Cluster.class, cluster.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "disable HA for cluster: " + getClusterId(); + } + +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForHostCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForHostCmd.java new file mode 100644 index 00000000000..87ebe878ceb --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForHostCmd.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; + +@APICommand(name = DisableHAForHostCmd.APINAME, description = "Disables HA for a host", + responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class DisableHAForHostCmd extends BaseAsyncCmd { + public static final String APINAME = "disableHAForHost"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result, final String resourceUuid) { + final HostHAResponse response = new HostHAResponse(); + response.setId(resourceUuid); + response.setEnabled(false); + response.setStatus(result); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + + final boolean result = haConfigManager.disableHA(host.getId(), HAResource.ResourceType.Host); + CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA enabled: false"); + CallContext.current().putContextParameter(Host.class, host.getUuid()); + + setupResponse(result, host.getUuid()); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "disable HA for host: " + getHostId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForZoneCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForZoneCmd.java new file mode 100644 index 00000000000..845c4a663b5 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForZoneCmd.java @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.dc.DataCenter; +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.api.response.ZoneResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = DisableHAForZoneCmd.APINAME, description = "Disables HA for a zone", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class DisableHAForZoneCmd extends BaseAsyncCmd { + public static final String APINAME = "disableHAForZone"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ZONE_ID, type = BaseCmd.CommandType.UUID, entityType = ZoneResponse.class, + description = "ID of the zone", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long zoneId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getZoneId() { + return zoneId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final DataCenter dataCenter = _resourceService.getZone(getZoneId()); + if (dataCenter == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find zone by ID: " + getZoneId()); + } + + final boolean result = haConfigManager.disableHA(dataCenter); + CallContext.current().setEventDetails("Zone Id:" + dataCenter.getId() + " HA enabled: false"); + CallContext.current().putContextParameter(DataCenter.class, dataCenter.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "disable HA for zone: " + getZoneId(); + } + +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForClusterCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForClusterCmd.java new file mode 100644 index 00000000000..e06d0d2c1b0 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForClusterCmd.java @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.org.Cluster; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.ClusterResponse; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = EnableHAForClusterCmd.APINAME, description = "Enables HA cluster-wide", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class EnableHAForClusterCmd extends BaseAsyncCmd { + public static final String APINAME = "enableHAForCluster"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.CLUSTER_ID, type = BaseCmd.CommandType.UUID, entityType = ClusterResponse.class, + description = "ID of the cluster", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long clusterId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getClusterId() { + return clusterId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Cluster cluster = _resourceService.getCluster(getClusterId()); + if (cluster == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find cluster by ID: " + getClusterId()); + } + + final boolean result = haConfigManager.enableHA(cluster); + CallContext.current().setEventDetails("Cluster Id:" + cluster.getId() + " HA enabled: true"); + CallContext.current().putContextParameter(Cluster.class, cluster.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_ENABLE; + } + + @Override + public String getEventDescription() { + return "enable HA for cluster: " + getClusterId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForHostCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForHostCmd.java new file mode 100644 index 00000000000..b23841ad56f --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForHostCmd.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; + +@APICommand(name = EnableHAForHostCmd.APINAME, description = "Enables HA for a host", + responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class EnableHAForHostCmd extends BaseAsyncCmd { + public static final String APINAME = "enableHAForHost"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result, final String resourceUuid) { + final HostHAResponse response = new HostHAResponse(); + response.setId(resourceUuid); + response.setEnabled(true); + response.setStatus(result); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + final boolean result = haConfigManager.enableHA(host.getId(), HAResource.ResourceType.Host); + + CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA enabled: true"); + CallContext.current().putContextParameter(Host.class, host.getUuid()); + + setupResponse(result, host.getUuid()); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_ENABLE; + } + + @Override + public String getEventDescription() { + return "enable HA for host: " + getHostId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForZoneCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForZoneCmd.java new file mode 100644 index 00000000000..443d303c296 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForZoneCmd.java @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.dc.DataCenter; +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.api.response.ZoneResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = EnableHAForZoneCmd.APINAME, description = "Enables HA for a zone", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class EnableHAForZoneCmd extends BaseAsyncCmd { + public static final String APINAME = "enableHAForZone"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ZONE_ID, type = BaseCmd.CommandType.UUID, entityType = ZoneResponse.class, + description = "ID of the zone", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long zoneId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getZoneId() { + return zoneId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final DataCenter dataCenter = _resourceService.getZone(getZoneId()); + if (dataCenter == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find zone by ID: " + getZoneId()); + } + + final boolean result = haConfigManager.enableHA(dataCenter); + CallContext.current().setEventDetails("Zone Id:" + dataCenter.getId() + " HA enabled: true"); + CallContext.current().putContextParameter(DataCenter.class, dataCenter.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_ENABLE; + } + + @Override + public String getEventDescription() { + return "enable HA for zone: " + getZoneId(); + } + +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAProvidersCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAProvidersCmd.java new file mode 100644 index 00000000000..64b9a6a8e5c --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAProvidersCmd.java @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.user.Account; +import com.google.common.base.Enums; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HAProviderResponse; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +@APICommand(name = ListHostHAProvidersCmd.APINAME, description = "Lists HA providers", responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class ListHostHAProvidersCmd extends BaseCmd { + public static final String APINAME = "listHostHAProviders"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HYPERVISOR, type = CommandType.STRING, required = true, + description = "Hypervisor type of the resource", validations = {ApiArgValidator.NotNullOrEmpty}) + private String hypervisorType; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public HAResource.ResourceSubType getHypervisorType() { + return HAResource.ResourceSubType.valueOf(hypervisorType); + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + private void setupResponse(final List hostHAProviderList) { + final ListResponse response = new ListResponse<>(); + final List hostHAResponses = new ArrayList<>(); + for (final String provider : hostHAProviderList) { + final HAProviderResponse haProviderResponse = new HAProviderResponse(); + haProviderResponse.setProvider(provider); + hostHAResponses.add(haProviderResponse); + } + response.setResponses(hostHAResponses); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + if (!Enums.getIfPresent(HAResource.ResourceSubType.class, hypervisorType).isPresent()) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Invalid or unsupported host hypervisor type provided. Supported types are: " + Arrays.toString(HAResource.ResourceSubType.values())); + } + final List hostHAProviders = haConfigManager.listHAProviders(HAResource.ResourceType.Host, getHypervisorType()); + setupResponse(hostHAProviders); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAResourcesCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAResourcesCmd.java new file mode 100644 index 00000000000..75a900c2198 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAResourcesCmd.java @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; +import java.util.ArrayList; +import java.util.List; + +@APICommand(name = ListHostHAResourcesCmd.APINAME, description = "Lists host HA resources", responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class ListHostHAResourcesCmd extends BaseCmd { + public static final String APINAME = "listHostHAResources"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "List by host ID", validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + private void setupResponse(final List hostHAConfigList) { + final ListResponse response = new ListResponse<>(); + final List hostHAResponses = new ArrayList<>(); + for (final HAConfig config : hostHAConfigList) { + final Host host = _resourceService.getHost(config.getResourceId()); + if (host == null) { + continue; + } + final HostHAResponse hostHAResponse = new HostHAResponse(); + hostHAResponse.setId(host.getUuid()); + hostHAResponse.setEnabled(config.isEnabled()); + hostHAResponse.setHaState(config.getState()); + hostHAResponse.setProvider(config.getHaProvider()); + hostHAResponses.add(hostHAResponse); + } + response.setResponses(hostHAResponses); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final List hostHAConfig = haConfigManager.listHAResources(getHostId(), HAResource.ResourceType.Host); + setupResponse(hostHAConfig); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java index e49aabc49d4..aa7cfed1e8f 100644 --- a/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java +++ b/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java @@ -108,4 +108,8 @@ public class PrepareForMaintenanceCmd extends BaseAsyncCmd { throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to prepare host for maintenance"); } } + + public void setHostId(final Long hostId) { + id = hostId; + } } diff --git a/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ChangeOutOfBandManagementPasswordCmd.java b/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ChangeOutOfBandManagementPasswordCmd.java index ea2e3cdb76b..0499a6e4577 100644 --- a/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ChangeOutOfBandManagementPasswordCmd.java +++ b/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ChangeOutOfBandManagementPasswordCmd.java @@ -74,7 +74,7 @@ public class ChangeOutOfBandManagementPasswordCmd extends BaseAsyncCmd { CallContext.current().setEventDetails("Host Id: " + host.getId() + " Password: " + getPassword().charAt(0) + "****"); CallContext.current().putContextParameter(Host.class, host.getUuid()); - final OutOfBandManagementResponse response = outOfBandManagementService.changeOutOfBandManagementPassword(host, getPassword()); + final OutOfBandManagementResponse response = outOfBandManagementService.changePassword(host, getPassword()); response.setResponseName(getCommandName()); setResponseObject(response); } diff --git a/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ConfigureOutOfBandManagementCmd.java b/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ConfigureOutOfBandManagementCmd.java index db224108df0..d9c6c9f439e 100644 --- a/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ConfigureOutOfBandManagementCmd.java +++ b/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/ConfigureOutOfBandManagementCmd.java @@ -83,7 +83,7 @@ public class ConfigureOutOfBandManagementCmd extends BaseCmd { throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); } CallContext.current().putContextParameter(Host.class, host.getUuid()); - final OutOfBandManagementResponse response = outOfBandManagementService.configureOutOfBandManagement(host, getHostPMOptions()); + final OutOfBandManagementResponse response = outOfBandManagementService.configure(host, getHostPMOptions()); response.setId(host.getUuid()); response.setResponseName(getCommandName()); setResponseObject(response); diff --git a/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/IssueOutOfBandManagementPowerActionCmd.java b/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/IssueOutOfBandManagementPowerActionCmd.java index 8d6bdd350d0..7593a3cbc52 100644 --- a/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/IssueOutOfBandManagementPowerActionCmd.java +++ b/api/src/org/apache/cloudstack/api/command/admin/outofbandmanagement/IssueOutOfBandManagementPowerActionCmd.java @@ -80,7 +80,7 @@ public class IssueOutOfBandManagementPowerActionCmd extends BaseAsyncCmd { CallContext.current().setEventDetails("Host Id: " + host.getId() + " Action: " + powerOperation.toString()); CallContext.current().putContextParameter(Host.class, host.getUuid()); - final OutOfBandManagementResponse response = outOfBandManagementService.executeOutOfBandManagementPowerOperation(host, powerOperation, getActionTimeout()); + final OutOfBandManagementResponse response = outOfBandManagementService.executePowerOperation(host, powerOperation, getActionTimeout()); response.setResponseName(getCommandName()); setResponseObject(response); } diff --git a/api/src/org/apache/cloudstack/api/response/HAProviderResponse.java b/api/src/org/apache/cloudstack/api/response/HAProviderResponse.java new file mode 100644 index 00000000000..d75cbc3e120 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/response/HAProviderResponse.java @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.ha.HAConfig; + +import java.util.List; + +@EntityReference(value = HAConfig.class) +public final class HAProviderResponse extends BaseResponse { + @SerializedName(ApiConstants.HA_PROVIDER) + @Param(description = "the HA provider") + private String provider; + + @SerializedName(ApiConstants.TYPE) + @Param(description = "the HA provider resource type detail") + private List supportedResourceTypes; + + public HAProviderResponse() { + super("haprovider"); + } + + public String getProvider() { + return provider; + } + + public void setProvider(String provider) { + this.provider = provider; + } + + public List getSupportedResourceTypes() { + return supportedResourceTypes; + } + + public void setSupportedResourceTypes(List supportedResourceTypes) { + this.supportedResourceTypes = supportedResourceTypes; + } +} diff --git a/api/src/org/apache/cloudstack/api/response/HostHAResponse.java b/api/src/org/apache/cloudstack/api/response/HostHAResponse.java new file mode 100644 index 00000000000..942250cbc21 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/response/HostHAResponse.java @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.ha.HAConfig; + +@EntityReference(value = HAConfig.class) +public final class HostHAResponse extends BaseResponse { + @SerializedName(ApiConstants.HOST_ID) + @Param(description = "the ID of the host") + private String id; + + @SerializedName(ApiConstants.HA_ENABLE) + @Param(description = "if host HA is enabled for the host") + private Boolean enabled; + + @SerializedName(ApiConstants.HA_STATE) + @Param(description = "the HA state of the host") + private HAConfig.HAState haState; + + @SerializedName(ApiConstants.HA_PROVIDER) + @Param(description = "the host HA provider") + private String provider; + + @SerializedName(ApiConstants.STATUS) + @Param(description = "operation status") + private Boolean status; + + public HostHAResponse() { + super("hostha"); + } + + public HostHAResponse(final HAConfig config) { + this(); + if (config == null) { + this.enabled = false; + this.haState = HAConfig.HAState.Disabled; + return; + } + setProvider(config.getHaProvider()); + setEnabled(config.isEnabled()); + setHaState(config.getState()); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Boolean getEnabled() { + return enabled; + } + + public void setEnabled(Boolean enabled) { + this.enabled = enabled; + } + + public HAConfig.HAState getHaState() { + return haState; + } + + public void setHaState(HAConfig.HAState haState) { + this.haState = haState; + } + + public String getProvider() { + return provider; + } + + public void setProvider(String provider) { + this.provider = provider; + } + + public Boolean getStatus() { + return status; + } + + public void setStatus(Boolean status) { + this.status = status; + } +} diff --git a/api/src/org/apache/cloudstack/api/response/HostResponse.java b/api/src/org/apache/cloudstack/api/response/HostResponse.java index ab9c8c37674..5923ca4144f 100644 --- a/api/src/org/apache/cloudstack/api/response/HostResponse.java +++ b/api/src/org/apache/cloudstack/api/response/HostResponse.java @@ -24,6 +24,7 @@ import com.google.gson.annotations.SerializedName; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseResponse; import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.ha.HAConfig; import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement; import java.util.Date; @@ -201,6 +202,10 @@ public class HostResponse extends BaseResponse { @Param(description = "true if this host is suitable(has enough capacity and satisfies all conditions like hosttags, max guests vm limit etc) to migrate a VM to it , false otherwise") private Boolean suitableForMigration; + @SerializedName("hostha") + @Param(description = "the host HA information information") + private HostHAResponse hostHAResponse; + @SerializedName("outofbandmanagement") @Param(description = "the host out-of-band management information") private OutOfBandManagementResponse outOfBandManagementResponse; @@ -408,6 +413,14 @@ public class HostResponse extends BaseResponse { this.suitableForMigration = suitableForMigration; } + public HostHAResponse getHostHAResponse() { + return hostHAResponse; + } + + public void setHostHAResponse(final HAConfig config) { + this.hostHAResponse = new HostHAResponse(config); + } + public OutOfBandManagementResponse getOutOfBandManagementResponse() { return outOfBandManagementResponse; } diff --git a/api/src/org/apache/cloudstack/ha/HAConfig.java b/api/src/org/apache/cloudstack/ha/HAConfig.java new file mode 100644 index 00000000000..36fe11c410d --- /dev/null +++ b/api/src/org/apache/cloudstack/ha/HAConfig.java @@ -0,0 +1,142 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package org.apache.cloudstack.ha; + +import com.cloud.utils.fsm.StateMachine2; +import com.cloud.utils.fsm.StateObject; +import org.apache.cloudstack.api.InternalIdentity; +import org.apache.cloudstack.utils.identity.ManagementServerNode; + +public interface HAConfig extends StateObject, InternalIdentity { + + long getResourceId(); + HAResource.ResourceType getResourceType(); + boolean isEnabled(); + HAState getState(); + String getHaProvider(); + Long getManagementServerId(); + + enum Event { + Eligible, + Ineligible, + Disabled, + Enabled, + HealthCheckPassed, + HealthCheckFailed, + PerformActivityCheck, + TooFewActivityCheckSamples, + PeriodicRecheckResourceActivity, + ActivityCheckFailureOverThresholdRatio, + ActivityCheckFailureUnderThresholdRatio, + PowerCycle, + Recovered, + RecoveryWaitPeriodTimeout, + RecoveryOperationThresholdExceeded, + Fenced; + + public Long getServerId() { + // TODO: change in future if we've better claim & ownership + // Right now the first one to update the db wins + // and mgmt server id would eventually become consistent + return ManagementServerNode.getManagementServerId(); + } + } + + enum HAState { + Disabled("HA Operations disabled"), + Available("The resource is healthy"), + Ineligible("The current state does not support HA/recovery"), + Suspect("Most recent health check failed"), + Degraded("The resource cannot be managed, but services end user requests"), + Checking("The activity checks are currently being performed"), + Recovering("The resource is undergoing recovery operation"), + Recovered("The resource is recovered"), + Fencing("The resource is undergoing fence operation"), + Fenced("The resource is fenced"); + + String description; + + HAState(String description) { + this.description = description; + } + + public static StateMachine2 getStateMachine() { + return FSM; + } + + public String getDescription() { + return description; + } + + private static final StateMachine2 FSM = new StateMachine2<>(); + + static { + FSM.addInitialTransition(Event.Disabled, Disabled); + FSM.addInitialTransition(Event.Enabled, Available); + FSM.addInitialTransition(Event.Ineligible, Ineligible); + + FSM.addTransition(Disabled, Event.Enabled, Available); + + FSM.addTransition(Ineligible, Event.Disabled, Disabled); + FSM.addTransition(Ineligible, Event.Ineligible, Ineligible); + FSM.addTransition(Ineligible, Event.Eligible, Available); + + FSM.addTransition(Available, Event.Disabled, Disabled); + FSM.addTransition(Available, Event.Ineligible, Ineligible); + FSM.addTransition(Available, Event.HealthCheckPassed, Available); + FSM.addTransition(Available, Event.HealthCheckFailed, Suspect); + + FSM.addTransition(Suspect, Event.Disabled, Disabled); + FSM.addTransition(Suspect, Event.Ineligible, Ineligible); + FSM.addTransition(Suspect, Event.HealthCheckFailed, Suspect); + FSM.addTransition(Suspect, Event.PerformActivityCheck, Checking); + FSM.addTransition(Suspect, Event.HealthCheckPassed, Available); + + FSM.addTransition(Checking, Event.Disabled, Disabled); + FSM.addTransition(Checking, Event.Ineligible, Ineligible); + FSM.addTransition(Checking, Event.TooFewActivityCheckSamples, Suspect); + FSM.addTransition(Checking, Event.ActivityCheckFailureUnderThresholdRatio, Degraded); + FSM.addTransition(Checking, Event.ActivityCheckFailureOverThresholdRatio, Recovering); + + FSM.addTransition(Degraded, Event.Disabled, Disabled); + FSM.addTransition(Degraded, Event.Ineligible, Ineligible); + FSM.addTransition(Degraded, Event.HealthCheckFailed, Degraded); + FSM.addTransition(Degraded, Event.HealthCheckPassed, Available); + FSM.addTransition(Degraded, Event.PeriodicRecheckResourceActivity, Suspect); + + FSM.addTransition(Recovering, Event.Disabled, Disabled); + FSM.addTransition(Recovering, Event.Ineligible, Ineligible); + FSM.addTransition(Recovering, Event.Recovered, Recovered); + FSM.addTransition(Recovering, Event.RecoveryOperationThresholdExceeded, Fencing); + + FSM.addTransition(Recovered, Event.Disabled, Disabled); + FSM.addTransition(Recovered, Event.Ineligible, Ineligible); + FSM.addTransition(Recovered, Event.RecoveryWaitPeriodTimeout, Available); + + FSM.addTransition(Fencing, Event.Disabled, Disabled); + FSM.addTransition(Fencing, Event.Ineligible, Ineligible); + FSM.addTransition(Fencing, Event.Fenced, Fenced); + + FSM.addTransition(Fenced, Event.Disabled, Disabled); + FSM.addTransition(Fenced, Event.HealthCheckPassed, Ineligible); + FSM.addTransition(Fenced, Event.HealthCheckFailed, Fenced); + } + } +} diff --git a/api/src/org/apache/cloudstack/ha/HAConfigManager.java b/api/src/org/apache/cloudstack/ha/HAConfigManager.java new file mode 100644 index 00000000000..c9a20358c36 --- /dev/null +++ b/api/src/org/apache/cloudstack/ha/HAConfigManager.java @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.dc.DataCenter; +import com.cloud.org.Cluster; + +import java.util.List; + +/** + * @since 4.11 + */ +public interface HAConfigManager { + /** + * Configures HA for a resource by accepting the resource type and HA provider + * @param resourceId the ID of the resource + * @param resourceType the type of the resource + * @param haProvider the name of the HA provider + */ + boolean configureHA(Long resourceId, HAResource.ResourceType resourceType, String haProvider); + + /** + * Enables HA for resource Id of a specific resource type + * @param resourceId the ID of the resource + * @param resourceType the type of the resource + * @return returns true on successful enable + */ + boolean enableHA(Long resourceId, HAResource.ResourceType resourceType); + + /** + * Disables HA for resource Id of a specific resource type + * @param resourceId the ID of the resource + * @param resourceType the type of the resource + * @return returns true on successful disable + */ + boolean disableHA(Long resourceId, HAResource.ResourceType resourceType); + + /** + * Enables HA across a cluster + * @param cluster the cluster + * @return returns operation success + */ + boolean enableHA(final Cluster cluster); + + /** + * Disables HA across a cluster + * @param cluster the cluster + * @return returns operation success + */ + boolean disableHA(final Cluster cluster); + + /** + * Enables HA across a zone + * @param zone the zone + * @return returns operation success + */ + boolean enableHA(final DataCenter zone); + + /** + * Disables HA across a zone + * @param zone the zone + * @return returns operation success + */ + boolean disableHA(final DataCenter zone); + + /** + * Returns list of HA config for resources, by resource ID and/or type if provided + * @param resourceId (optional) ID of the resource + * @param resourceType (optional) type of the resource + * @return returns list of ha config for the resource + */ + List listHAResources(final Long resourceId, final HAResource.ResourceType resourceType); + + /** + * Returns list of HA providers for resources + * @param resourceType type of the resource + * @param entityType sub-type of the resource + * @return returns list of ha provider names + */ + List listHAProviders(final HAResource.ResourceType resourceType, final HAResource.ResourceSubType entityType); +} diff --git a/api/src/org/apache/cloudstack/ha/HAResource.java b/api/src/org/apache/cloudstack/ha/HAResource.java new file mode 100644 index 00000000000..650a58b3297 --- /dev/null +++ b/api/src/org/apache/cloudstack/ha/HAResource.java @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import org.apache.cloudstack.kernel.Partition; + +public interface HAResource extends Partition { + enum ResourceType { + Host, + VirtualMachine + } + + enum ResourceSubType { + KVM, + Simulator, + Unknown + } + + long getDataCenterId(); + Long getClusterId(); + ResourceType resourceType(); +} diff --git a/api/src/org/apache/cloudstack/kernel/Partition.java b/api/src/org/apache/cloudstack/kernel/Partition.java new file mode 100644 index 00000000000..6d93e3e23ce --- /dev/null +++ b/api/src/org/apache/cloudstack/kernel/Partition.java @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.kernel; + +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.api.InternalIdentity; + +public interface Partition extends InternalIdentity, Identity { + enum PartitionType { + Zone, Pod, Cluster, Host, VM + } + PartitionType partitionType(); +} diff --git a/api/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementService.java b/api/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementService.java index 699b2c6a99b..a8b73b73889 100644 --- a/api/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementService.java +++ b/api/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementService.java @@ -30,9 +30,6 @@ public interface OutOfBandManagementService { ConfigKey ActionTimeout = new ConfigKey("Advanced", Long.class, "outofbandmanagement.action.timeout", "60", "The out of band management action timeout in seconds, configurable by cluster", true, ConfigKey.Scope.Cluster); - ConfigKey SyncThreadInterval = new ConfigKey("Advanced", Long.class, "outofbandmanagement.sync.interval", "300000", - "The interval (in milliseconds) when the out-of-band management background sync are retrieved", true, ConfigKey.Scope.Global); - ConfigKey SyncThreadPoolSize = new ConfigKey("Advanced", Integer.class, "outofbandmanagement.sync.poolsize", "50", "The out of band management background sync thread pool size", true, ConfigKey.Scope.Global); @@ -49,7 +46,7 @@ public interface OutOfBandManagementService { OutOfBandManagementResponse disableOutOfBandManagement(Cluster cluster); OutOfBandManagementResponse disableOutOfBandManagement(Host host); - OutOfBandManagementResponse configureOutOfBandManagement(Host host, ImmutableMap options); - OutOfBandManagementResponse executeOutOfBandManagementPowerOperation(Host host, OutOfBandManagement.PowerOperation operation, Long timeout); - OutOfBandManagementResponse changeOutOfBandManagementPassword(Host host, String password); + OutOfBandManagementResponse configure(Host host, ImmutableMap options); + OutOfBandManagementResponse executePowerOperation(Host host, OutOfBandManagement.PowerOperation operation, Long timeout); + OutOfBandManagementResponse changePassword(Host host, String password); } diff --git a/api/src/org/apache/cloudstack/poll/BackgroundPollManager.java b/api/src/org/apache/cloudstack/poll/BackgroundPollManager.java new file mode 100644 index 00000000000..b9d55000294 --- /dev/null +++ b/api/src/org/apache/cloudstack/poll/BackgroundPollManager.java @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.poll; + +public interface BackgroundPollManager { + /** + * Submits a background poll task that need to run continuously in the background + * to poll external resources, update states, trigger actions etc. + * Tasks must be submitted by a manager in configure-phase, the list of submitted tasks + * are then submitted to the internal executor service during start-phase. + * @param task periodic background task + * @since 4.11 + */ + void submitTask(final BackgroundPollTask task); +} diff --git a/api/src/org/apache/cloudstack/poll/BackgroundPollTask.java b/api/src/org/apache/cloudstack/poll/BackgroundPollTask.java new file mode 100644 index 00000000000..8eea147955b --- /dev/null +++ b/api/src/org/apache/cloudstack/poll/BackgroundPollTask.java @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.poll; + +public interface BackgroundPollTask extends Runnable { +} diff --git a/client/WEB-INF/classes/resources/messages.properties b/client/WEB-INF/classes/resources/messages.properties index a554c68c220..b1b810488db 100644 --- a/client/WEB-INF/classes/resources/messages.properties +++ b/client/WEB-INF/classes/resources/messages.properties @@ -639,6 +639,12 @@ label.guest.traffic=Guest Traffic label.guest.type=Guest Type label.guest=Guest label.ha.enabled=HA Enabled +label.ha.configure=Configure HA +label.ha.disable=Disable HA +label.ha.enable=Enable HA +label.ha.provider=HA Provider +label.ha.state=HA State +label.ha=HA label.help=Help label.hide.ingress.rule=Hide Ingress Rule label.hints=Hints @@ -998,6 +1004,7 @@ label.outofbandmanagement.driver=Driver label.outofbandmanagement.disable=Disable Out-of-band Management label.outofbandmanagement.enable=Enable Out-of-band Management label.outofbandmanagement.password=Password +label.outofbandmanagement.reenterpassword=Re-enter Password label.outofbandmanagement.port=Port label.outofbandmanagement.username=Username message.outofbandmanagement.changepassword=Change Out-of-band Management password diff --git a/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml b/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml index f757e7ed095..65dc8cca9db 100644 --- a/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml +++ b/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml @@ -44,4 +44,9 @@ + + + + + diff --git a/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml b/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml index f8b45ae15c0..d4cb8da068f 100644 --- a/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml +++ b/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml @@ -307,6 +307,11 @@ + + + + diff --git a/core/src/com/cloud/agent/api/CheckVMActivityOnStoragePoolCommand.java b/core/src/com/cloud/agent/api/CheckVMActivityOnStoragePoolCommand.java new file mode 100644 index 00000000000..b053f2895d2 --- /dev/null +++ b/core/src/com/cloud/agent/api/CheckVMActivityOnStoragePoolCommand.java @@ -0,0 +1,70 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +import com.cloud.agent.api.to.HostTO; +import com.cloud.agent.api.to.StorageFilerTO; +import com.cloud.host.Host; +import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; + +import org.joda.time.DateTime; +import java.util.List; + +public final class CheckVMActivityOnStoragePoolCommand extends Command { + + private HostTO host; + private StorageFilerTO pool; + private String volumeList; + private long suspectTimeSeconds; + + public CheckVMActivityOnStoragePoolCommand(final Host host, final StoragePool pool, final List volumeList, final DateTime suspectTime) { + this.host = new HostTO(host); + this.pool = new StorageFilerTO(pool); + this.suspectTimeSeconds = suspectTime.getMillis()/1000L; + final StringBuilder stringBuilder = new StringBuilder(); + for (final Volume v : volumeList) { + stringBuilder.append(v.getUuid()).append(","); + } + + this.volumeList = stringBuilder.deleteCharAt(stringBuilder.length() - 1).toString(); + } + + public String getVolumeList() { + return volumeList; + } + + public StorageFilerTO getPool() { + return pool; + } + + public HostTO getHost() { + return host; + } + + public long getSuspectTimeInSeconds() { + return suspectTimeSeconds; + } + + @Override + public boolean executeInSequence() { + return false; + } +} diff --git a/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java b/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java index 72ec83a41f7..3cba4929a57 100644 --- a/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java +++ b/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java @@ -27,6 +27,8 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.kernel.Partition; import org.junit.Test; import com.cloud.agent.api.CheckOnHostCommand; @@ -38,6 +40,11 @@ import com.cloud.resource.ResourceState; public class CheckOnHostCommandTest { public Host host = new Host() { + @Override + public PartitionType partitionType() { + return PartitionType.Host; + } + @Override public Status getState() { return Status.Up; @@ -197,7 +204,12 @@ public class CheckOnHostCommandTest { @Override public Long getClusterId() { return 3L; - }; + } + + @Override + public ResourceType resourceType() { + return ResourceType.Host; + } @Override public String getPublicIpAddress() { @@ -254,6 +266,11 @@ public class CheckOnHostCommandTest { return false; }; + @Override + public boolean isDisabled() { + return false; + }; + @Override public ResourceState getResourceState() { return ResourceState.Enabled; diff --git a/developer/developer-prefill.sql b/developer/developer-prefill.sql index f942c3c9b59..5d301f5ae40 100644 --- a/developer/developer-prefill.sql +++ b/developer/developer-prefill.sql @@ -78,10 +78,6 @@ INSERT INTO `cloud`.`configuration` (category, instance, component, name, value) VALUES ('Advanced', 'DEFAULT', 'management-server', 'pool.storage.capacity.disablethreshold', '0.95'); -INSERT INTO `cloud`.`configuration` (category, instance, component, name, value) - VALUES ('Advanced', 'DEFAULT', 'management-server', - 'outofbandmanagement.sync.interval', '2000'); - -- Enable dynamic RBAC by default for fresh deployments INSERT INTO `cloud`.`configuration` (category, instance, component, name, value) VALUES ('Advanced', 'DEFAULT', 'RoleService', diff --git a/engine/components-api/src/com/cloud/agent/AgentManager.java b/engine/components-api/src/com/cloud/agent/AgentManager.java index e9e32495d83..430d8f9cb63 100755 --- a/engine/components-api/src/com/cloud/agent/AgentManager.java +++ b/engine/components-api/src/com/cloud/agent/AgentManager.java @@ -139,6 +139,8 @@ public interface AgentManager { void disconnectWithoutInvestigation(long hostId, Status.Event event); + void disconnectWithInvestigation(long hostId, Status.Event event); + public void pullAgentToMaintenance(long hostId); public void pullAgentOutMaintenance(long hostId); diff --git a/engine/orchestration/src/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/com/cloud/agent/manager/AgentManagerImpl.java index e557965a571..cde890ba312 100755 --- a/engine/orchestration/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -1201,7 +1201,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } else if (cmd instanceof ShutdownCommand) { final ShutdownCommand shutdown = (ShutdownCommand)cmd; final String reason = shutdown.getReason(); - s_logger.info("Host " + attache.getId() + " has informed us that it is shutting down with reason " + reason + " and detail " + + s_logger.info("HA: Host " + attache.getId() + " has informed us that it is shutting down with reason " + reason + " and detail " + shutdown.getDetail()); if (reason.equals(ShutdownCommand.Update)) { //disconnectWithoutInvestigation(attache, Event.UpdateNeeded); diff --git a/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index d2028f8a365..195a711105f 100755 --- a/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -43,6 +43,8 @@ import javax.naming.ConfigurationException; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLEngine; +import org.apache.cloudstack.ha.dao.HAConfigDao; +import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.log4j.Logger; import com.google.gson.Gson; @@ -124,6 +126,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust ConfigurationDao _configDao; @Inject ConfigDepot _configDepot; + @Inject + private OutOfBandManagementDao outOfBandManagementDao; + @Inject + private HAConfigDao haConfigDao; protected ClusteredAgentManagerImpl() { super(); @@ -729,7 +735,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust s_logger.info("Marking hosts as disconnected on Management server" + vo.getMsid()); long lastPing = (System.currentTimeMillis() >> 10) - getTimeout(); _hostDao.markHostsAsDisconnected(vo.getMsid(), lastPing); - outOfBandManagementDao.expireOutOfBandManagementOwnershipByServer(vo.getMsid()); + outOfBandManagementDao.expireServerOwnership(vo.getMsid()); + haConfigDao.expireServerOwnership(vo.getMsid()); s_logger.info("Deleting entries from op_host_transfer table for Management server " + vo.getMsid()); cleanupTransferMap(vo.getMsid()); } diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java index 0c34e3c5285..75965fe4bd1 100644 --- a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java @@ -16,8 +16,16 @@ // under the License. package org.apache.cloudstack.engine.datacenter.entity.api.db; -import java.util.Date; -import java.util.UUID; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.org.Cluster; +import com.cloud.org.Grouping; +import com.cloud.org.Managed.ManagedState; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; import javax.persistence.Column; import javax.persistence.Entity; @@ -29,18 +37,8 @@ import javax.persistence.Id; import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; - -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.org.Cluster; -import com.cloud.org.Grouping; -import com.cloud.org.Managed.ManagedState; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; +import java.util.Date; +import java.util.UUID; @Entity @Table(name = "cluster") @@ -243,4 +241,9 @@ public class EngineClusterVO implements EngineCluster, Identity { public State getState() { return state; } + + @Override + public PartitionType partitionType() { + return PartitionType.Cluster; + } } diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java index 82b6150a5a3..238d78692f8 100644 --- a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java @@ -16,9 +16,14 @@ // under the License. package org.apache.cloudstack.engine.datacenter.entity.api.db; -import java.util.Date; -import java.util.Map; -import java.util.UUID; +import com.cloud.network.Network.Provider; +import com.cloud.org.Grouping; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; import javax.persistence.Column; import javax.persistence.Entity; @@ -32,16 +37,9 @@ import javax.persistence.TableGenerator; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; - -import com.cloud.network.Network.Provider; -import com.cloud.org.Grouping; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; +import java.util.Date; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "data_center") @@ -503,4 +501,9 @@ public class EngineDataCenterVO implements EngineDataCenter, Identity { public void setIp6Dns2(String ip6Dns2) { this.ip6Dns2 = ip6Dns2; } + + @Override + public PartitionType partitionType() { + return PartitionType.Zone; + } } diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java index b064a35864d..7d3775191b9 100644 --- a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java @@ -16,10 +16,16 @@ // under the License. package org.apache.cloudstack.engine.datacenter.entity.api.db; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.UUID; +import com.cloud.host.Status; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.resource.ResourceState; +import com.cloud.storage.Storage.StoragePoolType; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; import javax.persistence.Column; import javax.persistence.DiscriminatorColumn; @@ -36,18 +42,10 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; - -import com.cloud.host.Status; -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.resource.ResourceState; -import com.cloud.storage.Storage.StoragePoolType; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "host") @@ -178,6 +176,11 @@ public class EngineHostVO implements EngineHost, Identity { return clusterId; } + @Override + public ResourceType resourceType() { + return ResourceType.Host; + } + public void setClusterId(Long clusterId) { this.clusterId = clusterId; } @@ -722,6 +725,11 @@ public class EngineHostVO implements EngineHost, Identity { resourceState = state; } + @Override + public boolean isDisabled() { + return (getResourceState() == ResourceState.Disabled); + } + @Override public boolean isInMaintenanceStates() { return (getResourceState() == ResourceState.Maintenance || getResourceState() == ResourceState.ErrorInMaintenance || getResourceState() == ResourceState.PrepareForMaintenance); @@ -760,4 +768,9 @@ public class EngineHostVO implements EngineHost, Identity { public State getOrchestrationState() { return orchestrationState; } + + @Override + public PartitionType partitionType() { + return PartitionType.Host; + } } diff --git a/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml b/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml index da98484bbdc..afb9fec9ede 100644 --- a/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml +++ b/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml @@ -165,6 +165,8 @@ + + diff --git a/engine/schema/src/com/cloud/dc/ClusterVO.java b/engine/schema/src/com/cloud/dc/ClusterVO.java index b701542fbdb..2a76789136a 100644 --- a/engine/schema/src/com/cloud/dc/ClusterVO.java +++ b/engine/schema/src/com/cloud/dc/ClusterVO.java @@ -16,8 +16,12 @@ // under the License. package com.cloud.dc; -import java.util.Date; -import java.util.UUID; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.org.Cluster; +import com.cloud.org.Grouping; +import com.cloud.org.Managed.ManagedState; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; import javax.persistence.Column; import javax.persistence.Entity; @@ -27,13 +31,8 @@ import javax.persistence.GeneratedValue; import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.Table; - -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.org.Cluster; -import com.cloud.org.Grouping; -import com.cloud.org.Managed.ManagedState; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; +import java.util.Date; +import java.util.UUID; @Entity @Table(name = "cluster") @@ -192,4 +191,9 @@ public class ClusterVO implements Cluster { public void setUuid(String uuid) { this.uuid = uuid; } + + @Override + public PartitionType partitionType() { + return PartitionType.Cluster; + } } diff --git a/engine/schema/src/com/cloud/dc/DataCenterVO.java b/engine/schema/src/com/cloud/dc/DataCenterVO.java index 02aa98839ff..6d510dfdacc 100644 --- a/engine/schema/src/com/cloud/dc/DataCenterVO.java +++ b/engine/schema/src/com/cloud/dc/DataCenterVO.java @@ -16,9 +16,10 @@ // under the License. package com.cloud.dc; -import java.util.Date; -import java.util.Map; -import java.util.UUID; +import com.cloud.network.Network.Provider; +import com.cloud.org.Grouping; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; import javax.persistence.Column; import javax.persistence.Entity; @@ -30,11 +31,9 @@ import javax.persistence.Id; import javax.persistence.Table; import javax.persistence.TableGenerator; import javax.persistence.Transient; - -import com.cloud.network.Network.Provider; -import com.cloud.org.Grouping; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; +import java.util.Date; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "data_center") @@ -456,4 +455,9 @@ public class DataCenterVO implements DataCenter { public void setIp6Dns2(String ip6Dns2) { this.ip6Dns2 = ip6Dns2; } + + @Override + public PartitionType partitionType() { + return PartitionType.Zone; + } } diff --git a/engine/schema/src/com/cloud/host/HostVO.java b/engine/schema/src/com/cloud/host/HostVO.java index 4af8fb73d8f..b7f6de2d4f5 100755 --- a/engine/schema/src/com/cloud/host/HostVO.java +++ b/engine/schema/src/com/cloud/host/HostVO.java @@ -16,11 +16,12 @@ // under the License. package com.cloud.host; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; +import com.cloud.agent.api.VgpuTypesInfo; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.resource.ResourceState; +import com.cloud.storage.Storage.StoragePoolType; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; import javax.persistence.Column; import javax.persistence.DiscriminatorColumn; @@ -37,13 +38,11 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import com.cloud.agent.api.VgpuTypesInfo; -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.resource.ResourceState; -import com.cloud.storage.Storage.StoragePoolType; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "host") @@ -178,6 +177,11 @@ public class HostVO implements Host { return clusterId; } + @Override + public ResourceType resourceType() { + return ResourceType.Host; + } + public void setClusterId(Long clusterId) { this.clusterId = clusterId; } @@ -715,6 +719,11 @@ public class HostVO implements Host { return (getResourceState() == ResourceState.Maintenance || getResourceState() == ResourceState.ErrorInMaintenance || getResourceState() == ResourceState.PrepareForMaintenance); } + @Override + public boolean isDisabled() { + return (getResourceState() == ResourceState.Disabled); + } + public long getUpdated() { return updated; } @@ -732,4 +741,9 @@ public class HostVO implements Host { public void setUuid(String uuid) { this.uuid = uuid; } + + @Override + public PartitionType partitionType() { + return PartitionType.Host; + } } diff --git a/engine/schema/src/com/cloud/vm/VMInstanceVO.java b/engine/schema/src/com/cloud/vm/VMInstanceVO.java index 3d2c3af6772..fd91f0b44a7 100644 --- a/engine/schema/src/com/cloud/vm/VMInstanceVO.java +++ b/engine/schema/src/com/cloud/vm/VMInstanceVO.java @@ -16,11 +16,14 @@ // under the License. package com.cloud.vm; -import java.security.NoSuchAlgorithmException; -import java.security.SecureRandom; -import java.util.Date; -import java.util.Map; -import java.util.UUID; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.utils.db.Encrypt; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import com.cloud.utils.fsm.FiniteStateObject; +import com.cloud.vm.VirtualMachine.State; +import org.apache.commons.codec.binary.Base64; +import org.apache.log4j.Logger; import javax.persistence.Column; import javax.persistence.DiscriminatorColumn; @@ -36,16 +39,11 @@ import javax.persistence.TableGenerator; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import org.apache.commons.codec.binary.Base64; -import org.apache.log4j.Logger; - -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.utils.db.Encrypt; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; -import com.cloud.utils.fsm.FiniteStateObject; -import com.cloud.vm.VirtualMachine.State; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.util.Date; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "vm_instance") @@ -562,4 +560,9 @@ public class VMInstanceVO implements VirtualMachine, FiniteStateObject, StateDao { + HAConfig findHAResource(long resourceId, HAResource.ResourceType resourceType); + List listHAResource(final Long resourceId, final HAResource.ResourceType resourceType); + void expireServerOwnership(long serverId); +} \ No newline at end of file diff --git a/engine/schema/src/org/apache/cloudstack/ha/dao/HAConfigDaoImpl.java b/engine/schema/src/org/apache/cloudstack/ha/dao/HAConfigDaoImpl.java new file mode 100644 index 00000000000..4d74e2e6656 --- /dev/null +++ b/engine/schema/src/org/apache/cloudstack/ha/dao/HAConfigDaoImpl.java @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.dao; + +import com.cloud.utils.DateUtil; +import com.cloud.utils.db.Attribute; +import com.cloud.utils.db.DB; +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; +import com.cloud.utils.db.Transaction; +import com.cloud.utils.db.TransactionCallbackNoReturn; +import com.cloud.utils.db.TransactionLegacy; +import com.cloud.utils.db.TransactionStatus; +import com.cloud.utils.db.UpdateBuilder; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAConfigVO; +import org.apache.cloudstack.ha.HAResource; +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +@DB +@Component +public class HAConfigDaoImpl extends GenericDaoBase implements HAConfigDao { + private static final Logger LOG = Logger.getLogger(HAConfigDaoImpl.class); + + private static final String EXPIRE_OWNERSHIP = "UPDATE ha_config set mgmt_server_id=NULL where mgmt_server_id=?"; + + private SearchBuilder ResourceSearch; + private SearchBuilder StateUpdateSearch; + + private Attribute HAStateAttr; + private Attribute MsIdAttr; + private Attribute UpdateTimeAttr; + + public HAConfigDaoImpl() { + super(); + + ResourceSearch = createSearchBuilder(); + ResourceSearch.and("resourceId", ResourceSearch.entity().getResourceId(), SearchCriteria.Op.EQ); + ResourceSearch.and("resourceType", ResourceSearch.entity().getResourceType(), SearchCriteria.Op.EQ); + ResourceSearch.done(); + + StateUpdateSearch = createSearchBuilder(); + StateUpdateSearch.and("id", StateUpdateSearch.entity().getId(), SearchCriteria.Op.EQ); + StateUpdateSearch.and("haState", StateUpdateSearch.entity().getHaState(), SearchCriteria.Op.EQ); + StateUpdateSearch.and("update", StateUpdateSearch.entity().getUpdateCount(), SearchCriteria.Op.EQ); + StateUpdateSearch.done(); + + HAStateAttr = _allAttributes.get("haState"); + MsIdAttr = _allAttributes.get("managementServerId"); + UpdateTimeAttr = _allAttributes.get("updateTime"); + assert (HAStateAttr != null && MsIdAttr != null && UpdateTimeAttr != null) : "Couldn't find one of these attributes"; + } + + @Override + public boolean updateState(HAConfig.HAState currentState, HAConfig.Event event, HAConfig.HAState nextState, HAConfig vo, Object data) { + HAConfigVO haConfig = (HAConfigVO) vo; + if (haConfig == null) { + if (LOG.isTraceEnabled()) { + LOG.trace("Invalid ha config view object provided"); + } + return false; + } + + Long newManagementServerId = event.getServerId(); + if (currentState == nextState && (haConfig.getManagementServerId() != null && haConfig.getManagementServerId().equals(newManagementServerId))) { + return false; + } + + if (event == HAConfig.Event.Disabled) { + newManagementServerId = null; + } + + SearchCriteria sc = StateUpdateSearch.create(); + sc.setParameters("id", haConfig.getId()); + sc.setParameters("haState", currentState); + sc.setParameters("update", haConfig.getUpdateCount()); + + haConfig.incrUpdateCount(); + UpdateBuilder ub = getUpdateBuilder(haConfig); + ub.set(haConfig, HAStateAttr, nextState); + ub.set(haConfig, UpdateTimeAttr, DateUtil.currentGMTTime()); + ub.set(haConfig, MsIdAttr, newManagementServerId); + + int result = update(ub, sc, null); + if (LOG.isTraceEnabled() && result <= 0) { + LOG.trace(String.format("Failed to update HA state from:%s to:%s due to event:%s for the ha_config id:%d", currentState, nextState, event, haConfig.getId())); + } + return result > 0; + } + + @Override + public HAConfig findHAResource(final long resourceId, final HAResource.ResourceType resourceType) { + final SearchCriteria sc = ResourceSearch.create(); + sc.setParameters("resourceId", resourceId); + sc.setParameters("resourceType", resourceType); + return findOneBy(sc); + } + + @Override + public List listHAResource(final Long resourceId, final HAResource.ResourceType resourceType) { + final SearchCriteria sc = ResourceSearch.create(); + if (resourceId != null && resourceId > 0L) { + sc.setParameters("resourceId", resourceId); + } + if (resourceType != null) { + sc.setParameters("resourceType", resourceType); + } + return new ArrayList(listBy(sc)); + } + + @Override + public void expireServerOwnership(final long serverId) { + Transaction.execute(new TransactionCallbackNoReturn() { + @Override + public void doInTransactionWithoutResult(TransactionStatus status) { + TransactionLegacy txn = TransactionLegacy.currentTxn(); + try (final PreparedStatement pstmt = txn.prepareAutoCloseStatement(EXPIRE_OWNERSHIP);) { + pstmt.setLong(1, serverId); + pstmt.executeUpdate(); + } catch (SQLException e) { + txn.rollback(); + LOG.warn("Failed to expire HA ownership of management server id: " + serverId); + } + } + }); + } +} diff --git a/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDao.java b/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDao.java index 5985b816256..8a4ee36710f 100644 --- a/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDao.java +++ b/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDao.java @@ -27,5 +27,5 @@ import java.util.List; public interface OutOfBandManagementDao extends GenericDao, StateDao { OutOfBandManagement findByHost(long hostId); List findAllByManagementServer(long serverId); - void expireOutOfBandManagementOwnershipByServer(long serverId); + void expireServerOwnership(long serverId); } diff --git a/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDaoImpl.java b/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDaoImpl.java index b914b6b702d..3cdd28fe25a 100644 --- a/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDaoImpl.java +++ b/engine/schema/src/org/apache/cloudstack/outofbandmanagement/dao/OutOfBandManagementDaoImpl.java @@ -110,7 +110,7 @@ public class OutOfBandManagementDaoImpl extends GenericDaoBase + + + + + + + diff --git a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java index 469bd8b55ad..b63e96326cd 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java +++ b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java @@ -1,4 +1,5 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -29,7 +30,8 @@ import com.cloud.hypervisor.Hypervisor; import com.cloud.resource.ResourceManager; import com.cloud.storage.Storage.StoragePoolType; import com.cloud.utils.component.AdapterBase; - +import com.cloud.vm.VirtualMachine; +import org.apache.cloudstack.ha.HAManager; import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; import org.apache.log4j.Logger; @@ -49,10 +51,17 @@ public class KVMInvestigator extends AdapterBase implements Investigator { ResourceManager _resourceMgr; @Inject PrimaryDataStoreDao _storagePoolDao; + @Inject + private HAManager haManager; @Override - public Boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) { + public Boolean isVmAlive(VirtualMachine vm, Host host) { + if (haManager.isHAEligible(host)) { + return haManager.isVMAliveOnHost(host); + } + Status status = isAgentAlive(host); + s_logger.debug("HA: HOST is ineligible legacy state " + status + " for host " + host.getId()); if (status == null) { return null; } @@ -65,6 +74,10 @@ public class KVMInvestigator extends AdapterBase implements Investigator { return null; } + if (haManager.isHAEligible(agent)) { + return haManager.getHostStatus(agent); + } + List clusterPools = _storagePoolDao.listPoolsByCluster(agent.getClusterId()); boolean hasNfs = false; for (StoragePoolVO pool : clusterPools) { @@ -119,6 +132,7 @@ public class KVMInvestigator extends AdapterBase implements Investigator { if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { hostStatus = Status.Down; } + s_logger.debug("HA: HOST is ineligible legacy state " + hostStatus + " for host " + agent.getId()); return hostStatus; } } diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java index b829f78f625..be5ab396d19 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java @@ -179,4 +179,9 @@ public class KVMHABase { return result; } + + public Boolean checkingHB() { + // TODO Auto-generated method stub + return null; + } } diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java index c2af191cae6..12cd06023c7 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java @@ -17,14 +17,18 @@ package com.cloud.hypervisor.kvm.resource; import java.util.ArrayList; + import java.util.List; import java.util.concurrent.Callable; +import javax.ejb.Local; + import org.apache.log4j.Logger; import com.cloud.utils.script.OutputInterpreter; import com.cloud.utils.script.Script; +@Local(value = {KVMHAChecker.class}) public class KVMHAChecker extends KVMHABase implements Callable { private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class); private List _pools; @@ -40,10 +44,10 @@ public class KVMHAChecker extends KVMHABase implements Callable { * True means heartbeaing is on going, or we can't get it's status. False * means heartbeating is stopped definitely */ - private Boolean checkingHB() { + @Override + public Boolean checkingHB() { List results = new ArrayList(); for (NfsStoragePool pool : _pools) { - Script cmd = new Script(s_heartBeatPath, _heartBeatCheckerTimeout, s_logger); cmd.add("-i", pool._poolIp); cmd.add("-p", pool._poolMountSourcePath); @@ -53,9 +57,9 @@ public class KVMHAChecker extends KVMHABase implements Callable { cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000)); OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); String result = cmd.execute(parser); - s_logger.debug("pool: " + pool._poolIp); - s_logger.debug("reture: " + result); - s_logger.debug("parser: " + parser.getLine()); + s_logger.debug("KVMHAChecker pool: " + pool._poolIp); + s_logger.debug("KVMHAChecker result: " + result); + s_logger.debug("KVMHAChecker parser: " + parser.getLine()); if (result == null && parser.getLine().contains("> DEAD <")) { s_logger.debug("read heartbeat failed: " + result); results.add(false); diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java index 86950458fe1..082eb9f906d 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java @@ -69,6 +69,12 @@ public class KVMHAMonitor extends KVMHABase implements Runnable { } } + public NfsStoragePool getStoragePool(String uuid) { + synchronized (_storagePool) { + return _storagePool.get(uuid); + } + } + private class Monitor extends ManagedContextRunnable { @Override diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java new file mode 100644 index 00000000000..f14d0a25b7f --- /dev/null +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.hypervisor.kvm.resource; + +import com.cloud.utils.script.OutputInterpreter; +import com.cloud.utils.script.Script; +import org.apache.log4j.Logger; +import org.joda.time.Duration; + +import java.util.concurrent.Callable; + +public class KVMHAVMActivityChecker extends KVMHABase implements Callable { + private static final Logger LOG = Logger.getLogger(KVMHAVMActivityChecker.class); + + final private NfsStoragePool nfsStoragePool; + final private String hostIP; + final private String volumeUuidList; + final private String vmActivityCheckPath; + final private Duration activityScriptTimeout = Duration.standardSeconds(3600L); + final private long suspectTimeInSeconds; + + public KVMHAVMActivityChecker(final NfsStoragePool pool, final String host, final String volumeUUIDListString, String vmActivityCheckPath, final long suspectTime) { + this.nfsStoragePool = pool; + this.hostIP = host; + this.volumeUuidList = volumeUUIDListString; + this.vmActivityCheckPath = vmActivityCheckPath; + this.suspectTimeInSeconds = suspectTime; + } + + @Override + public Boolean checkingHB() { + Script cmd = new Script(vmActivityCheckPath, activityScriptTimeout.getStandardSeconds(), LOG); + cmd.add("-i", nfsStoragePool._poolIp); + cmd.add("-p", nfsStoragePool._poolMountSourcePath); + cmd.add("-m", nfsStoragePool._mountDestPath); + cmd.add("-h", hostIP); + cmd.add("-u", volumeUuidList); + cmd.add("-t", String.valueOf(String.valueOf(System.currentTimeMillis() / 1000))); + cmd.add("-d", String.valueOf(suspectTimeInSeconds)); + OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); + String result = cmd.execute(parser); + LOG.debug("KVMHAVMActivityChecker pool: " + nfsStoragePool._poolIp); + LOG.debug("KVMHAVMActivityChecker result: " + result); + LOG.debug("KVMHAVMActivityChecker parser: " + parser.getLine()); + if (result == null && parser.getLine().contains("DEAD")) { + return false; + } else { + return true; + } + } + + @Override + public Boolean call() throws Exception { + return checkingHB(); + } +} diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 5a5d01d6b96..4859e04ed16 100755 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -105,6 +105,7 @@ import com.cloud.agent.api.CheckNetworkAnswer; import com.cloud.agent.api.CheckNetworkCommand; import com.cloud.agent.api.CheckOnHostCommand; import com.cloud.agent.api.CheckStateCommand; +import com.cloud.agent.api.CheckVMActivityOnStoragePoolCommand; import com.cloud.agent.api.CheckVirtualMachineAnswer; import com.cloud.agent.api.CheckVirtualMachineCommand; import com.cloud.agent.api.CleanupNetworkRulesCmd; @@ -307,6 +308,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv private String _resizeVolumePath; private String _createTmplPath; private String _heartBeatPath; + private String _vmActivityCheckPath; private String _securityGroupPath; private String _ovsPvlanDhcpHostPath; private String _ovsPvlanVmPath; @@ -668,6 +670,11 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv throw new ConfigurationException("Unable to find the resizevolume.sh"); } + _vmActivityCheckPath = Script.findScript(kvmScriptsDir, "kvmvmactivity.sh"); + if (_vmActivityCheckPath == null) { + throw new ConfigurationException("Unable to find kvmvmactivity.sh"); + } + _createTmplPath = Script.findScript(storageScriptsDir, "createtmplt.sh"); if (_createTmplPath == null) { throw new ConfigurationException("Unable to find the createtmplt.sh"); @@ -1396,6 +1403,8 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv return execute((PvlanSetupCommand)cmd); } else if (cmd instanceof CheckOnHostCommand) { return execute((CheckOnHostCommand)cmd); + } else if (cmd instanceof CheckVMActivityOnStoragePoolCommand) { + return execute((CheckVMActivityOnStoragePoolCommand)cmd); } else if (cmd instanceof OvsFetchInterfaceCommand) { return execute((OvsFetchInterfaceCommand)cmd); } else if (cmd instanceof OvsSetupBridgeCommand) { @@ -1411,7 +1420,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv } else if (cmd instanceof OvsVpcRoutingPolicyConfigCommand) { return execute((OvsVpcRoutingPolicyConfigCommand) cmd); } else { - s_logger.warn("Unsupported command "); + s_logger.warn("Unsupported command " + cmd.getClass()); return Answer.createUnsupportedCommandAnswer(cmd); } } catch (final IllegalArgumentException e) { @@ -1742,17 +1751,39 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv try { Boolean result = future.get(); if (result) { - return new Answer(cmd, false, "Heart is still beating..."); + return new Answer(cmd, false, "Heart is beating..."); } else { s_logger.warn("Heartbeat failed for : " + cmd.getHost().getPrivateNetwork().getIp().toString()); return new Answer(cmd); } } catch (InterruptedException e) { - return new Answer(cmd, false, "can't get status of host:"); + return new Answer(cmd, false, "CheckOnHostCommand: can't get status of host: InterruptedException"); } catch (ExecutionException e) { - return new Answer(cmd, false, "can't get status of host:"); + return new Answer(cmd, false, "CheckOnHostCommand: can't get status of host: ExecutionException"); } + } + protected Answer execute(CheckVMActivityOnStoragePoolCommand cmd) { + ExecutorService executors = Executors.newSingleThreadExecutor(); + StorageFilerTO pool = cmd.getPool(); + if (StoragePoolType.NetworkFilesystem == pool.getType()){ + NfsStoragePool nfspool = _monitor.getStoragePool(pool.getUuid()); + KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, cmd.getHost().getPrivateNetwork().getIp(), cmd.getVolumeList(), _vmActivityCheckPath, cmd.getSuspectTimeInSeconds()); + Future future = executors.submit(ha); + try { + Boolean result = future.get(); + if (result) { + return new Answer(cmd, false, "VMHA disk activity detected ..."); + } else { + return new Answer(cmd); + } + } catch (InterruptedException e) { + return new Answer(cmd, false, "CheckVMActivityOnStoragePoolCommand: can't get status of host: InterruptedException"); + } catch (ExecutionException e) { + return new Answer(cmd, false, "CheckVMActivityOnStoragePoolCommand: can't get status of host: ExecutionException"); + } + } + return new Answer(cmd, false, "Unsupported Storage"); } protected Storage.StorageResourceType getStorageResourceType() { diff --git a/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAConfig.java b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAConfig.java new file mode 100644 index 00000000000..59ea720328f --- /dev/null +++ b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAConfig.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.kvm.ha; + +import org.apache.cloudstack.framework.config.ConfigKey; + +public class KVMHAConfig { + + public static final ConfigKey KvmHAHealthCheckTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.health.check.timeout", "10", + "The maximum length of time, in seconds, expected for an health check to complete.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.timeout", "60", + "The maximum length of time, in seconds, expected for an activity check to complete.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckInterval = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.interval", "60", + "The interval, in seconds, between activity checks.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckMaxAttempts = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.max.attempts", "10", + "The maximum number of activity check attempts to perform before deciding to recover or degrade a resource.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckFailureThreshold = new ConfigKey<>("Advanced", Double.class, "kvm.ha.activity.check.failure.ratio", "0.7", + "The activity check failure threshold ratio. This is used with the activity check maximum attempts for deciding to recover or degrade a resource. For most environments, please keep this value above 0.5.", + true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHADegradedMaxPeriod = new ConfigKey<>("Advanced", Long.class, "kvm.ha.degraded.max.period", "300", + "The maximum length of time, in seconds, a resource can be in degraded state where only health checks are performed.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHARecoverTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.timeout", "60", + "The maximum length of time, in seconds, expected for a recovery operation to complete.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHARecoverWaitPeriod = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.wait.period", "600", + "The maximum length of time, in seconds, to wait for a resource to recover.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHARecoverAttemptThreshold = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.failure.threshold", "1", + "The maximum recovery attempts to be made for a resource, after which the resource is fenced. The recovery counter resets when a health check passes for a resource.", + true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAFenceTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.fence.timeout", "60", + "The maximum length of time, in seconds, expected for a fence operation to complete.", true, ConfigKey.Scope.Cluster); + +} diff --git a/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAProvider.java b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAProvider.java new file mode 100644 index 00000000000..5399fd23a1c --- /dev/null +++ b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAProvider.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.kvm.ha; + +import com.cloud.host.Host; +import com.cloud.hypervisor.Hypervisor; + +import org.apache.cloudstack.api.response.OutOfBandManagementResponse; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; +import org.apache.cloudstack.ha.provider.host.HAAbstractHostProvider; +import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement.PowerOperation; +import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementService; +import org.apache.log4j.Logger; +import org.joda.time.DateTime; + +import javax.inject.Inject; +import java.security.InvalidParameterException; + +public final class KVMHAProvider extends HAAbstractHostProvider implements HAProvider, Configurable { + private final static Logger LOG = Logger.getLogger(KVMHAProvider.class); + + @Inject + protected KVMHostActivityChecker hostActivityChecker; + @Inject + protected OutOfBandManagementService outOfBandManagementService; + + @Override + public boolean isEligible(final Host host) { + if (outOfBandManagementService.isOutOfBandManagementEnabled(host)){ + return !isInMaintenanceMode(host) && !isDisabled(host) && + hostActivityChecker.getNeighbors(host).length > 0 && + (Hypervisor.HypervisorType.KVM.equals(host.getHypervisorType()) || + Hypervisor.HypervisorType.LXC.equals(host.getHypervisorType())); + } + return false; + } + + @Override + public boolean isHealthy(final Host r) throws HACheckerException { + return hostActivityChecker.isHealthy(r); + } + + @Override + public boolean hasActivity(final Host r, final DateTime suspectTime) throws HACheckerException { + return hostActivityChecker.isActive(r, suspectTime); + } + + @Override + public boolean recover(Host r) throws HARecoveryException { + try { + if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){ + final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.RESET, null); + return resp.getSuccess(); + } else { + LOG.warn("OOBM recover operation failed for the host " + r.getName()); + return false; + } + } catch (Exception e){ + LOG.warn("OOBM service is not configured or enabled for this host " + r.getName() + " error is " + e.getMessage()); + throw new HARecoveryException(" OOBM service is not configured or enabled for this host " + r.getName(), e); + } + } + + @Override + public boolean fence(Host r) throws HAFenceException { + try { + if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){ + final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.OFF, null); + return resp.getSuccess(); + } else { + LOG.warn("OOBM fence operation failed for this host " + r.getName()); + return false; + } + } catch (Exception e){ + LOG.warn("OOBM service is not configured or enabled for this host " + r.getName() + " error is " + e.getMessage()); + throw new HAFenceException("OOBM service is not configured or enabled for this host " + r.getName() , e); + } + } + + @Override + public HAResource.ResourceSubType resourceSubType() { + return HAResource.ResourceSubType.KVM; + } + + @Override + public Object getConfigValue(final HAProviderConfig name, final Host host) { + final Long clusterId = host.getClusterId(); + switch (name) { + case HealthCheckTimeout: + return KVMHAConfig.KvmHAHealthCheckTimeout.valueIn(clusterId); + case ActivityCheckTimeout: + return KVMHAConfig.KvmHAActivityCheckTimeout.valueIn(clusterId); + case MaxActivityCheckInterval: + return KVMHAConfig.KvmHAActivityCheckInterval.valueIn(clusterId); + case MaxActivityChecks: + return KVMHAConfig.KvmHAActivityCheckMaxAttempts.valueIn(clusterId); + case ActivityCheckFailureRatio: + return KVMHAConfig.KvmHAActivityCheckFailureThreshold.valueIn(clusterId); + case RecoveryWaitTimeout: + return KVMHAConfig.KvmHARecoverWaitPeriod.valueIn(clusterId); + case RecoveryTimeout: + return KVMHAConfig.KvmHARecoverTimeout.valueIn(clusterId); + case FenceTimeout: + return KVMHAConfig.KvmHAFenceTimeout.valueIn(clusterId); + case MaxRecoveryAttempts: + return KVMHAConfig.KvmHARecoverAttemptThreshold.valueIn(clusterId); + case MaxDegradedWaitTimeout: + return KVMHAConfig.KvmHADegradedMaxPeriod.valueIn(clusterId); + default: + throw new InvalidParameterException("Unknown HAProviderConfig " + name.toString()); + } + } + + @Override + public String getConfigComponentName() { + return KVMHAConfig.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[] { + KVMHAConfig.KvmHAHealthCheckTimeout, + KVMHAConfig.KvmHAActivityCheckTimeout, + KVMHAConfig.KvmHARecoverTimeout, + KVMHAConfig.KvmHAFenceTimeout, + KVMHAConfig.KvmHAActivityCheckInterval, + KVMHAConfig.KvmHAActivityCheckMaxAttempts, + KVMHAConfig.KvmHAActivityCheckFailureThreshold, + KVMHAConfig.KvmHADegradedMaxPeriod, + KVMHAConfig.KvmHARecoverWaitPeriod, + KVMHAConfig.KvmHARecoverAttemptThreshold + }; + } +} diff --git a/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java new file mode 100644 index 00000000000..060b484fec7 --- /dev/null +++ b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.kvm.ha; + +import com.cloud.agent.AgentManager; +import com.cloud.agent.api.Answer; +import com.cloud.agent.api.CheckOnHostCommand; +import com.cloud.agent.api.CheckVMActivityOnStoragePoolCommand; +import com.cloud.exception.StorageUnavailableException; +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.resource.ResourceManager; +import com.cloud.storage.StorageManager; +import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; +import com.cloud.storage.VolumeVO; +import com.cloud.storage.dao.VolumeDao; +import com.cloud.utils.component.AdapterBase; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.dao.VMInstanceDao; +import org.apache.cloudstack.ha.provider.ActivityCheckerInterface; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HealthCheckerInterface; +import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; +import org.apache.commons.lang.ArrayUtils; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import java.util.ArrayList; +import org.joda.time.DateTime; +import java.util.HashMap; +import java.util.List; + +public class KVMHostActivityChecker extends AdapterBase implements ActivityCheckerInterface, HealthCheckerInterface { + private final static Logger LOG = Logger.getLogger(KVMHostActivityChecker.class); + + @Inject + private VolumeDao volumeDao; + @Inject + private VMInstanceDao vmInstanceDao; + @Inject + private AgentManager agentMgr; + @Inject + private PrimaryDataStoreDao storagePool; + @Inject + private StorageManager storageManager; + @Inject + private ResourceManager resourceManager; + + @Override + public boolean isActive(Host r, DateTime suspectTime) throws HACheckerException { + try { + return isVMActivtyOnHost(r, suspectTime); + } + catch (StorageUnavailableException e){ + throw new HACheckerException("Storage is unavailable to do the check, mostly host is not reachable ", e); + } + catch (Exception e){ + throw new HACheckerException("Operation timed out, mostly host is not reachable ", e); + } + } + + @Override + public boolean isHealthy(Host r) { + return isAgentActive(r); + } + + private boolean isAgentActive(Host agent) { + if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) { + throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType()); + } + Status hostStatus = Status.Unknown; + Status neighbourStatus = Status.Unknown; + final CheckOnHostCommand cmd = new CheckOnHostCommand(agent); + try { + Answer answer = agentMgr.easySend(agent.getId(), cmd); + if (answer != null) { + hostStatus = answer.getResult() ? Status.Down : Status.Up; + if ( hostStatus == Status.Up ){ + return true; + } + } + else { + hostStatus = Status.Disconnected; + } + } catch (Exception e) { + LOG.warn("Failed to send command to host: " + agent.getId()); + } + + List neighbors = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); + for (HostVO neighbor : neighbors) { + if (neighbor.getId() == agent.getId() || (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) { + continue; + } + if (LOG.isTraceEnabled()){ + LOG.trace("Investigating host:" + agent.getId() + " via neighbouring host:" + neighbor.getId()); + } + try { + Answer answer = agentMgr.easySend(neighbor.getId(), cmd); + if (answer != null) { + neighbourStatus = answer.getResult() ? Status.Down : Status.Up; + if (LOG.isTraceEnabled()){ + LOG.trace("Neighbouring host:" + neighbor.getId() + " returned status:" + neighbourStatus + " for the investigated host:" + agent.getId()); + } + if (neighbourStatus == Status.Up) { + break; + } + } + } catch (Exception e) { + if (LOG.isTraceEnabled()) { + LOG.trace("Failed to send command to host: " + neighbor.getId()); + } + } + } + if (neighbourStatus == Status.Up && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { + hostStatus = Status.Disconnected; + } + if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { + hostStatus = Status.Down; + } + + if (LOG.isTraceEnabled()){ + LOG.trace("Resource state = " + hostStatus.name()); + } + return hostStatus == Status.Up; + } + + private boolean isVMActivtyOnHost(Host agent, DateTime suspectTime) throws StorageUnavailableException { + if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) { + throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType()); + } + boolean activityStatus = true; + HashMap> poolVolMap = getVolumeUuidOnHost(agent); + for (StoragePool pool : poolVolMap.keySet()) { + //for each storage pool find activity + List volume_list = poolVolMap.get(pool); + final CheckVMActivityOnStoragePoolCommand cmd = new CheckVMActivityOnStoragePoolCommand(agent, pool, volume_list, suspectTime); + //send the command to appropriate storage pool + Answer answer = storageManager.sendToPool(pool, getNeighbors(agent), cmd); + if (answer != null) { + activityStatus = ! answer.getResult(); + } else { + throw new IllegalStateException("Did not get a valid response for VM activity check for host " + agent.getId()); + } + } + if (LOG.isDebugEnabled()){ + LOG.debug("Resource active = " + activityStatus); + } + return activityStatus; + } + + private HashMap> getVolumeUuidOnHost(Host agent) { + List vm_list = vmInstanceDao.listByHostId(agent.getId()); + List volume_list = new ArrayList(); + for (VirtualMachine vm : vm_list) { + List vm_volume_list = volumeDao.findByInstance(vm.getId()); + volume_list.addAll(vm_volume_list); + } + + HashMap> poolVolMap = new HashMap>(); + for (Volume vol : volume_list) { + StoragePool sp = storagePool.findById(vol.getPoolId()); + if (!poolVolMap.containsKey(sp)) { + List list = new ArrayList(); + list.add(vol); + + poolVolMap.put(sp, list); + } else { + poolVolMap.get(sp).add(vol); + } + } + return poolVolMap; + } + + public long[] getNeighbors(Host agent) { + List neighbors = new ArrayList(); + List cluster_hosts = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); + for (HostVO host : cluster_hosts) { + if (host.getId() == agent.getId() || (host.getHypervisorType() != Hypervisor.HypervisorType.KVM && host.getHypervisorType() != Hypervisor.HypervisorType.LXC)) { + continue; + } + neighbors.add(host.getId()); + } + return ArrayUtils.toPrimitive(neighbors.toArray(new Long[neighbors.size()])); + } + +} diff --git a/plugins/hypervisors/kvm/test/org/apache/cloudstack/kvm/ha/KVMHostHATest.java b/plugins/hypervisors/kvm/test/org/apache/cloudstack/kvm/ha/KVMHostHATest.java new file mode 100644 index 00000000000..26b7e6ae206 --- /dev/null +++ b/plugins/hypervisors/kvm/test/org/apache/cloudstack/kvm/ha/KVMHostHATest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.cloudstack.kvm.ha; + +import com.cloud.exception.StorageUnavailableException; +import com.cloud.host.Host; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.mockito.runners.MockitoJUnitRunner; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +import org.joda.time.DateTime; + +@RunWith(MockitoJUnitRunner.class) +public class KVMHostHATest { + + @Mock + private Host host; + @Mock + private KVMHostActivityChecker kvmHostActivityChecker; + private KVMHAProvider kvmHAProvider; + + @Before + public void setup() { + MockitoAnnotations.initMocks(this); + kvmHAProvider = new KVMHAProvider(); + kvmHAProvider.hostActivityChecker = kvmHostActivityChecker; + } + + @Test + public void testHostActivityForHealthyHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + when(kvmHostActivityChecker.isHealthy(host)).thenReturn(true); + assertTrue(kvmHAProvider.isHealthy(host)); + } + + @Test + public void testHostActivityForUnHealthyHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + when(kvmHostActivityChecker.isHealthy(host)).thenReturn(false); + assertFalse(kvmHAProvider.isHealthy(host)); + } + + @Test + public void testHostActivityForActiveHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + DateTime dt = new DateTime(); + when(kvmHostActivityChecker.isActive(host, dt)).thenReturn(true); + assertTrue(kvmHAProvider.hasActivity(host, dt)); + } + + @Test + public void testHostActivityForDownHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + DateTime dt = new DateTime(); + when(kvmHostActivityChecker.isActive(host, dt)).thenReturn(false); + assertFalse(kvmHAProvider.hasActivity(host, dt)); + } + +} diff --git a/plugins/hypervisors/simulator/pom.xml b/plugins/hypervisors/simulator/pom.xml index 5dbf4bdf2d0..c2cc84f68a1 100644 --- a/plugins/hypervisors/simulator/pom.xml +++ b/plugins/hypervisors/simulator/pom.xml @@ -63,5 +63,10 @@ cloud-engine-storage-snapshot ${project.version} + + com.google.guava + guava + ${cs.guava.version} + diff --git a/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml b/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml index ba7581d7e25..d55cf02adf8 100644 --- a/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml +++ b/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml @@ -36,4 +36,8 @@ + + + + diff --git a/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java b/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java index b0c0bc677b5..ab0ed475d9b 100644 --- a/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java +++ b/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java @@ -29,6 +29,8 @@ import javax.naming.ConfigurationException; import com.cloud.agent.api.routing.SetMonitorServiceCommand; +import com.cloud.api.commands.ConfigureSimulatorHAProviderState; +import com.cloud.api.commands.ListSimulatorHAStateTransitions; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; import org.apache.cloudstack.storage.command.DeleteCommand; @@ -195,6 +197,8 @@ public class SimulatorManagerImpl extends ManagerBase implements SimulatorManage cmdList.add(ConfigureSimulatorCmd.class); cmdList.add(QuerySimulatorMockCmd.class); cmdList.add(CleanupSimulatorMockCmd.class); + cmdList.add(ConfigureSimulatorHAProviderState.class); + cmdList.add(ListSimulatorHAStateTransitions.class); return cmdList; } diff --git a/plugins/hypervisors/simulator/src/com/cloud/api/commands/ConfigureSimulatorHAProviderState.java b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ConfigureSimulatorHAProviderState.java new file mode 100644 index 00000000000..1d68a184a5a --- /dev/null +++ b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ConfigureSimulatorHAProviderState.java @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.api.commands; + +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.SimulatorHAProvider; +import org.apache.cloudstack.ha.SimulatorHAState; + +import javax.inject.Inject; + +@APICommand(name = ConfigureSimulatorHAProviderState.APINAME, + description="configures simulator HA provider state for a host for probing and testing", + responseObject=SuccessResponse.class, + since = "4.11", authorized = {RoleType.Admin}) +public final class ConfigureSimulatorHAProviderState extends BaseCmd { + public static final String APINAME = "configureSimulatorHAProviderState"; + + @Inject + private HAManager haManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = BaseCmd.CommandType.UUID, entityType = HostResponse.class, + description = "List by host ID", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + @Parameter(name = ApiConstants.HEALTH, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should be healthy", + required = true) + private Boolean healthy; + + @Parameter(name = ApiConstants.ACTIVITY, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should have activity", + required = true) + private Boolean activity; + + @Parameter(name = ApiConstants.RECOVER, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should be be recoverable", + required = true) + private Boolean recovery; + + @Parameter(name = ApiConstants.FENCE, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should be be fence-able", + required = true) + private Boolean fenceable; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + final SimulatorHAState haState = new SimulatorHAState(healthy, activity, recovery, fenceable); + final SimulatorHAProvider simulatorHAProvider = (SimulatorHAProvider) haManager.getHAProvider(SimulatorHAProvider.class.getSimpleName().toLowerCase()); + if (simulatorHAProvider != null) { + simulatorHAProvider.setHAStateForHost(host.getId(), haState); + } + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(simulatorHAProvider != null); + response.setResponseName(getCommandName()); + response.setObjectName("simulatorhaprovider"); + setResponseObject(response); + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } +} diff --git a/plugins/hypervisors/simulator/src/com/cloud/api/commands/ListSimulatorHAStateTransitions.java b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ListSimulatorHAStateTransitions.java new file mode 100644 index 00000000000..52368747523 --- /dev/null +++ b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ListSimulatorHAStateTransitions.java @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.api.commands; + +import com.cloud.api.response.SimulatorHAStateResponse; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.BaseListCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.SimulatorHAProvider; + +import javax.inject.Inject; +import java.util.ArrayList; +import java.util.List; + +@APICommand(name = ListSimulatorHAStateTransitions.APINAME, + description="list recent simulator HA state transitions for a host for probing and testing", + responseObject=SimulatorHAStateResponse.class, + since = "4.11", authorized = {RoleType.Admin}) +public final class ListSimulatorHAStateTransitions extends BaseListCmd { + public static final String APINAME = "listSimulatorHAStateTransitions"; + + @Inject + private HAManager haManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = BaseCmd.CommandType.UUID, entityType = HostResponse.class, + description = "List by host ID", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + + final SimulatorHAProvider simulatorHAProvider = (SimulatorHAProvider) haManager.getHAProvider(SimulatorHAProvider.class.getSimpleName().toLowerCase()); + List recentStates = new ArrayList<>(); + if (simulatorHAProvider != null) { + recentStates = simulatorHAProvider.listHAStateTransitions(host.getId()); + } + final ListResponse response = new ListResponse<>(); + response.setResponses(recentStates); + response.setResponseName(getCommandName()); + response.setObjectName("simulatorhastatetransition"); + setResponseObject(response); + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } +} diff --git a/plugins/hypervisors/simulator/src/com/cloud/api/response/SimulatorHAStateResponse.java b/plugins/hypervisors/simulator/src/com/cloud/api/response/SimulatorHAStateResponse.java new file mode 100644 index 00000000000..47481ad605f --- /dev/null +++ b/plugins/hypervisors/simulator/src/com/cloud/api/response/SimulatorHAStateResponse.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.ha.HAConfig; + +public class SimulatorHAStateResponse extends BaseResponse { + @SerializedName(ApiConstants.HA_STATE) @Param(description="the ha state") + private String haState; + + @SerializedName("prevhastate") @Param(description="the previous ha state") + private String previousHaState; + + @SerializedName("event") @Param(description="the event that caused state transition") + private String haEvent; + + @SerializedName("activitycounter") @Param(description="the activity counter") + private Long activityCounter; + + @SerializedName("recoverycounter") @Param(description="the recovery counter") + private Long recoveryCounter; + + public void setHaState(final HAConfig.HAState haState) { + if (haState != null) { + this.haState = haState.toString().toLowerCase(); + } + } + + public void setPreviousHaState(final HAConfig.HAState previousHaState) { + if (previousHaState != null) { + this.previousHaState = previousHaState.toString().toLowerCase(); + } + } + + public void setHaEvent(final HAConfig.Event haEvent) { + this.haEvent = haEvent.toString().toLowerCase(); + } + + public void setActivityCounter(Long activityCounter) { + this.activityCounter = activityCounter; + } + + public void setRecoveryCounter(Long recoveryCounter) { + this.recoveryCounter = recoveryCounter; + } +} diff --git a/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java b/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java index 7191ae3dab1..de0c75f095b 100644 --- a/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java +++ b/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java @@ -21,6 +21,7 @@ import java.util.List; import javax.ejb.Local; import javax.inject.Inject; +import org.apache.cloudstack.ha.HAManager; import org.apache.log4j.Logger; import com.cloud.agent.AgentManager; @@ -50,6 +51,8 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator { ResourceManager _resourceMgr; @Inject MockConfigurationDao _mockConfigDao; + @Inject + private HAManager haManager; protected SimulatorInvestigator() { } @@ -60,6 +63,10 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator { return null; } + if (haManager.isHAEligible(agent)) { + return haManager.getHostStatus(agent); + } + CheckOnHostCommand cmd = new CheckOnHostCommand(agent); List neighbors = _resourceMgr.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); for (HostVO neighbor : neighbors) { @@ -81,6 +88,10 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator { @Override public Boolean isVmAlive(VirtualMachine vm, Host host) { + if (haManager.isHAEligible(host)) { + return haManager.isVMAliveOnHost(host); + } + CheckVirtualMachineCommand cmd = new CheckVirtualMachineCommand(vm.getInstanceName()); try { Answer answer = _agentMgr.send(vm.getHostId(), cmd); diff --git a/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java new file mode 100644 index 00000000000..3c3e92f6fff --- /dev/null +++ b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.api.response.SimulatorHAStateResponse; +import com.cloud.host.Host; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.utils.fsm.StateListener; +import com.cloud.utils.fsm.StateMachine2; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; +import org.apache.cloudstack.ha.provider.host.HAAbstractHostProvider; +import org.joda.time.DateTime; + +import javax.inject.Inject; +import java.security.InvalidParameterException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class SimulatorHAProvider extends HAAbstractHostProvider implements HAProvider, StateListener { + + @Inject + private HAManager haManager; + + private final Map hostHAStateMap = new ConcurrentHashMap<>(); + + public SimulatorHAProvider() { + HAConfig.HAState.getStateMachine().registerListener(this); + } + + public void setHAStateForHost(final Long hostId, final SimulatorHAState state) { + hostHAStateMap.put(hostId, state); + haManager.purgeHACounter(hostId, HAResource.ResourceType.Host); + } + + public List listHAStateTransitions(final Long hostId) { + final SimulatorHAState haState = hostHAStateMap.get(hostId); + if (haState == null) { + return Collections.emptyList(); + } + return haState.listRecentStateTransitions(); + } + + @Override + public HAResource.ResourceType resourceType() { + return HAResource.ResourceType.Host; + } + + @Override + public HAResource.ResourceSubType resourceSubType() { + return HAResource.ResourceSubType.Simulator; + } + + @Override + public boolean isEligible(final Host host) { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return !isInMaintenanceMode(host) && !isDisabled(host) && haState != null + && Hypervisor.HypervisorType.Simulator.equals(host.getHypervisorType()); + } + + @Override + public boolean isHealthy(final Host host) throws HACheckerException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.isHealthy(); + } + + @Override + public boolean hasActivity(final Host host, final DateTime afterThis) throws HACheckerException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.hasActivity(); + } + + @Override + public boolean recover(final Host host) throws HARecoveryException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.canRecover(); + } + + @Override + public boolean fence(final Host host) throws HAFenceException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.canFenced(); + } + + @Override + public Object getConfigValue(final HAProvider.HAProviderConfig name, final Host host) { + switch (name) { + case HealthCheckTimeout: + return 5L; + case ActivityCheckTimeout: + return 5L; + case RecoveryTimeout: + return 5L; + case FenceTimeout: + return 5L; + case MaxActivityCheckInterval: + return 1L; + case MaxActivityChecks: + return 3L; + case ActivityCheckFailureRatio: + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return (haState != null && haState.hasActivity()) ? 1.0 : 0.0; + case MaxDegradedWaitTimeout: + return 1L; + case MaxRecoveryAttempts: + return 2L; + case RecoveryWaitTimeout: + return 1L; + default: + throw new InvalidParameterException("Unknown HAProviderConfig " + name.toString()); + } + } + + @Override + public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event, + final HAConfig.HAState newState, final HAConfig vo, final boolean status, final Object opaque) { + return false; + } + + @Override + public boolean postStateTransitionEvent(final StateMachine2.Transition transition, + final HAConfig vo, final boolean status, final Object opaque) { + if (vo.getResourceType() != HAResource.ResourceType.Host) { + return false; + } + final SimulatorHAState haState = hostHAStateMap.get(vo.getResourceId()); + if (haState == null || !status) { + return false; + } + final HAResourceCounter counter = haManager.getHACounter(vo.getResourceId(), vo.getResourceType()); + return haState.addStateTransition(transition.getToState(), transition.getCurrentState(), transition.getEvent(), counter); + } +} \ No newline at end of file diff --git a/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAState.java b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAState.java new file mode 100644 index 00000000000..f47ed2f11a2 --- /dev/null +++ b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAState.java @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.api.response.SimulatorHAStateResponse; +import com.google.common.collect.EvictingQueue; + +import java.util.ArrayList; +import java.util.List; +import java.util.Queue; + +public final class SimulatorHAState { + private boolean healthy; + private boolean activity; + private boolean recover; + private boolean fence; + private Queue stateTransitions = EvictingQueue.create(100); + + public SimulatorHAState(boolean healthy, boolean activity, boolean recover, boolean fence) { + this.healthy = healthy; + this.activity = activity; + this.recover = recover; + this.fence = fence; + } + + public boolean isHealthy() { + return healthy; + } + + public void setHealthy(boolean healthy) { + this.healthy = healthy; + } + + public boolean hasActivity() { + return activity; + } + + public void setActivity(boolean activity) { + this.activity = activity; + } + + public boolean canRecover() { + return recover; + } + + public void setRecover(boolean recover) { + this.recover = recover; + } + + public boolean canFenced() { + return fence; + } + + public void setFence(boolean fence) { + this.fence = fence; + } + + public boolean addStateTransition(final HAConfig.HAState newHaState, final HAConfig.HAState oldHaState, final HAConfig.Event event, final HAResourceCounter counter) { + final SimulatorHAStateResponse stateResponse = new SimulatorHAStateResponse(); + stateResponse.setHaState(newHaState); + stateResponse.setPreviousHaState(oldHaState); + stateResponse.setHaEvent(event); + if (counter != null) { + stateResponse.setActivityCounter(counter.getActivityCheckCounter()); + stateResponse.setRecoveryCounter(counter.getRecoveryCounter()); + } + stateResponse.setObjectName("hastatetransition"); + return stateTransitions.add(stateResponse); + } + + public List listRecentStateTransitions() { + return new ArrayList<>(stateTransitions); + } +} diff --git a/pom.xml b/pom.xml index 7f594df7991..ea903f4001c 100644 --- a/pom.xml +++ b/pom.xml @@ -828,7 +828,6 @@ tools/ngui/static/js/lib/* **/.checkstyle scripts/installer/windows/acs_license.rtf - test/integration/component/test_host_ha.sh diff --git a/scripts/vm/hypervisor/kvm/kvmvmactivity.sh b/scripts/vm/hypervisor/kvm/kvmvmactivity.sh new file mode 100755 index 00000000000..2e0b535b901 --- /dev/null +++ b/scripts/vm/hypervisor/kvm/kvmvmactivity.sh @@ -0,0 +1,134 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +help() { + printf "Usage: $0 + -i nfs server ip + -p nfs server path + -m mount point + -h host + -u volume uuid list + -t time on ms + -d suspect time\n" + exit 1 +} + +#set -x + +NfsSvrIP= +NfsSvrPath= +MountPoint= +HostIP= +UUIDList= +MSTime= +SuspectTime= + +while getopts 'i:p:m:u:t:h:d:' OPTION +do + case $OPTION in + i) + NfsSvrIP="$OPTARG" + ;; + p) + NfsSvrPath="$OPTARG" + ;; + m) + MountPoint="$OPTARG" + ;; + h) + HostIP="$OPTARG" + ;; + u) + UUIDList="$OPTARG" + ;; + t) + MSTime="$OPTARG" + ;; + d) + SuspectTime="$OPTARG" + ;; + *) + help + ;; + esac +done + +if [ -z "$NfsSvrIP" ] +then + exit 2 +fi + +if [ -z "$SuspectTime" ] +then + exit 2 +fi + +hbFile="$MountPoint/KVMHA/hb-$HostIP" +acFile="$MountPoint/KVMHA/ac-$HostIP" + +# First check: heartbeat file +now=$(date +%s) +hb=$(cat $hbFile) +diff=$(expr $now - $hb) +if [ $diff -lt 61 ] +then + echo "=====> ALIVE <=====" + exit 0 +fi + +if [ -z "$UUIDList" ] +then + echo "=====> DEAD <======" + exit 0 +fi + +# Second check: disk activity check +cd $MountPoint +latestUpdateTime=$(stat -c %Y $(echo $UUIDList | sed 's/,/ /g') | sort -nr | head -1) + +if [ ! -f $acFile ]; then + echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile + + if [[ $latestUpdateTime -gt $SuspectTime ]]; then + echo "=====> ALIVE <=====" + else + echo "=====> DEAD <======" + fi +else + acTime=$(cat $acFile) + arrTime=(${acTime//:/ }) + lastSuspectTime=${arrTime[0]} + lastUpdateTime=${arrTime[1]} + echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile + + if [[ $lastSuspectTime -ne $SuspectTime ]]; then + if [[ $latestUpdateTime -gt $SuspectTime ]]; then + echo "=====> ALIVE <=====" + else + echo "=====> DEAD <======" + fi + else + if [[ $latestUpdateTime -gt $lastUpdateTime ]]; then + echo "=====> ALIVE <=====" + else + echo "=====> DEAD <======" + fi + fi +fi + +exit 0 diff --git a/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 77ecc7f2e39..5aff66b16a0 100644 --- a/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -70,6 +70,15 @@ value="#{resourceDiscoverersRegistry.registered}" /> + + + + + + + + + diff --git a/server/src/com/cloud/alert/AlertManagerImpl.java b/server/src/com/cloud/alert/AlertManagerImpl.java index 80b5100c4ac..4038be54fcf 100755 --- a/server/src/com/cloud/alert/AlertManagerImpl.java +++ b/server/src/com/cloud/alert/AlertManagerImpl.java @@ -760,7 +760,8 @@ public class AlertManagerImpl extends ManagerBase implements AlertManager, Confi (alertType != AlertManager.AlertType.ALERT_TYPE_STORAGE_MISC) && (alertType != AlertManager.AlertType.ALERT_TYPE_MANAGMENT_NODE) && (alertType != AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED) && - (alertType != AlertManager.AlertType.ALERT_TYPE_OOBM_AUTH_ERROR)) { + (alertType != AlertManager.AlertType.ALERT_TYPE_OOBM_AUTH_ERROR) && + (alertType != AlertManager.AlertType.ALERT_TYPE_HA_ACTION)) { alert = _alertDao.getLastAlert(alertType.getType(), dataCenterId, podId, clusterId); } diff --git a/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java b/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java index 50a30ab4bc1..d5542566c63 100644 --- a/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java +++ b/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java @@ -28,6 +28,8 @@ import javax.ejb.Local; import javax.inject.Inject; import com.cloud.host.dao.HostDetailsDao; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.dao.HAConfigDao; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; @@ -61,6 +63,8 @@ public class HostJoinDaoImpl extends GenericDaoBase implements @Inject private HostDetailsDao hostDetailsDao; @Inject + private HAConfigDao haConfigDao; + @Inject private OutOfBandManagementDao outOfBandManagementDao; private final SearchBuilder hostSearch; @@ -224,6 +228,7 @@ public class HostJoinDaoImpl extends GenericDaoBase implements } } + hostResponse.setHostHAResponse(haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host)); hostResponse.setOutOfBandManagementResponse(outOfBandManagementDao.findByHost(host.getId())); hostResponse.setResourceState(host.getResourceState().toString()); diff --git a/server/src/com/cloud/api/query/vo/HostJoinVO.java b/server/src/com/cloud/api/query/vo/HostJoinVO.java index dcd058fce10..ea2e5185c76 100644 --- a/server/src/com/cloud/api/query/vo/HostJoinVO.java +++ b/server/src/com/cloud/api/query/vo/HostJoinVO.java @@ -36,6 +36,7 @@ import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.org.Cluster; import com.cloud.resource.ResourceState; import com.cloud.utils.db.GenericDao; +import org.apache.cloudstack.ha.HAConfig; import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement; /** @@ -99,6 +100,15 @@ public class HostJoinVO extends BaseViewVO implements InternalIdentity, Identity @Enumerated(value = EnumType.STRING) private OutOfBandManagement.PowerState outOfBandManagementPowerState; + @Column(name = "ha_enabled") + private boolean hostHAEnabled = false; + + @Column(name = "ha_state") + private HAConfig.HAState hostHAState; + + @Column(name = "ha_provider") + private String hostHAProvider; + @Column(name = "resource_state") @Enumerated(value = EnumType.STRING) private ResourceState resourceState; @@ -260,6 +270,18 @@ public class HostJoinVO extends BaseViewVO implements InternalIdentity, Identity return outOfBandManagementPowerState; } + public boolean isHostHAEnabled() { + return hostHAEnabled; + } + + public HAConfig.HAState getHostHAState() { + return hostHAState; + } + + public String getHostHAProvider() { + return hostHAProvider; + } + public ResourceState getResourceState() { return resourceState; } diff --git a/server/src/com/cloud/server/StatsCollector.java b/server/src/com/cloud/server/StatsCollector.java index acbe3d093a4..c5677ecd0ac 100755 --- a/server/src/com/cloud/server/StatsCollector.java +++ b/server/src/com/cloud/server/StatsCollector.java @@ -18,7 +18,6 @@ package com.cloud.server; import java.util.ArrayList; import java.util.Calendar; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -32,11 +31,6 @@ import java.util.concurrent.TimeUnit; import javax.inject.Inject; -import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement; -import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementService; -import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementVO; -import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; -import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.usage.UsageUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; @@ -139,8 +133,6 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc @Inject private HostDao _hostDao; @Inject - private OutOfBandManagementDao outOfBandManagementDao; - @Inject private UserVmDao _userVmDao; @Inject private VolumeDao _volsDao; @@ -157,8 +149,6 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc @Inject private ResourceManager _resourceMgr; @Inject - private OutOfBandManagementService outOfBandManagementService; - @Inject private ConfigurationDao _configDao; @Inject private EndPointSelector _epSelector; @@ -198,7 +188,6 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc private ConcurrentHashMap _storagePoolStats = new ConcurrentHashMap(); long hostStatsInterval = -1L; - long hostOutOfBandManagementStatsInterval = -1L; long hostAndVmStatsInterval = -1L; long storageStatsInterval = -1L; long volumeStatsInterval = -1L; @@ -237,7 +226,6 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc private void init(Map configs) { _executor = Executors.newScheduledThreadPool(6, new NamedThreadFactory("StatsCollector")); - hostOutOfBandManagementStatsInterval = OutOfBandManagementService.SyncThreadInterval.value(); hostStatsInterval = NumbersUtil.parseLong(configs.get("host.stats.interval"), 60000L); hostAndVmStatsInterval = NumbersUtil.parseLong(configs.get("vm.stats.interval"), 60000L); storageStatsInterval = NumbersUtil.parseLong(configs.get("storage.stats.interval"), 60000L); @@ -249,10 +237,6 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc _executor.scheduleWithFixedDelay(new HostCollector(), 15000L, hostStatsInterval, TimeUnit.MILLISECONDS); } - if (hostOutOfBandManagementStatsInterval > 0) { - _executor.scheduleWithFixedDelay(new HostOutOfBandManagementStatsCollector(), 15000L, hostOutOfBandManagementStatsInterval, TimeUnit.MILLISECONDS); - } - if (hostAndVmStatsInterval > 0) { _executor.scheduleWithFixedDelay(new VmStatsCollector(), 15000L, hostAndVmStatsInterval, TimeUnit.MILLISECONDS); } @@ -371,36 +355,6 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc } } - class HostOutOfBandManagementStatsCollector extends ManagedContextRunnable { - @Override - protected void runInContext() { - try { - s_logger.debug("HostOutOfBandManagementStatsCollector is running..."); - List outOfBandManagementHosts = outOfBandManagementDao.findAllByManagementServer(ManagementServerNode.getManagementServerId()); - if (outOfBandManagementHosts == null) { - return; - } - for (OutOfBandManagement outOfBandManagementHost : outOfBandManagementHosts) { - Host host = _hostDao.findById(outOfBandManagementHost.getHostId()); - if (host == null) { - continue; - } - if (outOfBandManagementService.isOutOfBandManagementEnabled(host)) { - outOfBandManagementService.submitBackgroundPowerSyncTask(host); - } else if (outOfBandManagementHost.getPowerState() != OutOfBandManagement.PowerState.Disabled) { - if (outOfBandManagementService.transitionPowerStateToDisabled(Collections.singletonList(host))) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Out-of-band management was disabled in zone/cluster/host, disabled power state for host id:" + host.getId()); - } - } - } - } - } catch (Throwable t) { - s_logger.error("Error trying to retrieve host out-of-band management stats", t); - } - } - } - class VmStatsCollector extends ManagedContextRunnable { @Override protected void runInContext() { diff --git a/server/src/com/cloud/storage/StorageManagerImpl.java b/server/src/com/cloud/storage/StorageManagerImpl.java index aeb0bb5f59f..4394b3332e7 100755 --- a/server/src/com/cloud/storage/StorageManagerImpl.java +++ b/server/src/com/cloud/storage/StorageManagerImpl.java @@ -1006,7 +1006,6 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C Command[] cmdArray = cmds.toCommands(); for (Command cmd : cmdArray) { long targetHostId = _hvGuruMgr.getGuruProcessedCommandTargetHost(hostId, cmd); - answers.add(_agentMgr.send(targetHostId, cmd)); } return new Pair(hostId, answers.toArray(new Answer[answers.size()])); @@ -2075,8 +2074,6 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C " for template id " +templateOnImageStore.getTemplateId(), th); } } - - } // get bytesReadRate from service_offering, disk_offering and vm.disk.throttling.bytes_read_rate diff --git a/server/src/org/apache/cloudstack/ha/HAManager.java b/server/src/org/apache/cloudstack/ha/HAManager.java new file mode 100644 index 00000000000..ef622b6a0df --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/HAManager.java @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.host.Host; +import com.cloud.host.Status; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.ha.provider.HAProvider; + +public interface HAManager extends HAConfigManager { + + ConfigKey MaxConcurrentHealthCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.health.check.operations", + "50", + "The number of concurrent health check operations per management server. This setting determines the size of the thread pool consuming the HEALTH CHECK queue.", true); + + ConfigKey MaxPendingHealthCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.health.check.operations", + "5000", + "The number of pending health check operations per management server. This setting determines the size of the HEALTH CHECK queue.", true); + + ConfigKey MaxConcurrentActivityCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.activity.check.operations", + "25", + "The number of concurrent activity check operations per management server. This setting determines the size of the thread pool consuming the ACTIVITY CHECK queue.", + true); + + ConfigKey MaxPendingActivityCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.activity.check.operations", + "2500", + "The number of pending activity check operations per management server. This setting determines the size of the size of the ACTIVITY CHECK queue.", true); + + ConfigKey MaxConcurrentRecoveryOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.recovery.operations", + "25", + "The number of concurrent recovery operations per management server.", true); + + ConfigKey MaxPendingRecoveryOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.recovery.operations", + "2500", + "The number of pending recovery operations per management server. This setting determines the size of the size of the RECOVERY queue.", true); + + ConfigKey MaxConcurrentFenceOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.fence.operations", + "25", + "The number of concurrent fence operations per management server.", true); + + ConfigKey MaxPendingFenceOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.fence.operations", + "2500", + "The number of pending fence operations per management server. This setting determines the size of the size of the FENCE queue.", true); + + boolean transitionHAState(final HAConfig.Event event, final HAConfig haConfig); + HAProvider getHAProvider(final String name); + HAResourceCounter getHACounter(final Long resourceId, final HAResource.ResourceType resourceType); + void purgeHACounter(final Long resourceId, final HAResource.ResourceType resourceType); + + boolean isHAEligible(final HAResource resource); + Boolean isVMAliveOnHost(final Host host); + Status getHostStatus(final Host host); +} \ No newline at end of file diff --git a/server/src/org/apache/cloudstack/ha/HAManagerImpl.java b/server/src/org/apache/cloudstack/ha/HAManagerImpl.java new file mode 100644 index 00000000000..0229f0a0266 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/HAManagerImpl.java @@ -0,0 +1,743 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.cluster.ClusterManagerListener; +import com.cloud.cluster.ManagementServerHost; +import com.cloud.dc.ClusterDetailsDao; +import com.cloud.dc.ClusterDetailsVO; +import com.cloud.dc.DataCenter; +import com.cloud.dc.DataCenterDetailVO; +import com.cloud.dc.dao.DataCenterDetailsDao; +import com.cloud.domain.Domain; +import com.cloud.event.ActionEvent; +import com.cloud.event.ActionEventUtils; +import com.cloud.event.EventTypes; +import com.cloud.host.Host; +import com.cloud.host.Status; +import com.cloud.host.dao.HostDao; +import com.cloud.org.Cluster; +import com.cloud.utils.component.ComponentContext; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.component.PluggableService; +import com.cloud.utils.db.Transaction; +import com.cloud.utils.db.TransactionCallback; +import com.cloud.utils.db.TransactionStatus; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.fsm.NoTransitionException; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.command.admin.ha.ConfigureHAForHostCmd; +import org.apache.cloudstack.api.command.admin.ha.DisableHAForClusterCmd; +import org.apache.cloudstack.api.command.admin.ha.DisableHAForHostCmd; +import org.apache.cloudstack.api.command.admin.ha.DisableHAForZoneCmd; +import org.apache.cloudstack.api.command.admin.ha.EnableHAForClusterCmd; +import org.apache.cloudstack.api.command.admin.ha.EnableHAForHostCmd; +import org.apache.cloudstack.api.command.admin.ha.EnableHAForZoneCmd; +import org.apache.cloudstack.api.command.admin.ha.ListHostHAProvidersCmd; +import org.apache.cloudstack.api.command.admin.ha.ListHostHAResourcesCmd; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.ha.dao.HAConfigDao; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HAProvider.HAProviderConfig; +import org.apache.cloudstack.ha.task.ActivityCheckTask; +import org.apache.cloudstack.ha.task.FenceTask; +import org.apache.cloudstack.ha.task.HealthCheckTask; +import org.apache.cloudstack.ha.task.RecoveryTask; +import org.apache.cloudstack.kernel.Partition; +import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.poll.BackgroundPollManager; +import org.apache.cloudstack.poll.BackgroundPollTask; +import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import javax.naming.ConfigurationException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +public final class HAManagerImpl extends ManagerBase implements HAManager, ClusterManagerListener, PluggableService, Configurable { + public static final Logger LOG = Logger.getLogger(HAManagerImpl.class); + + @Inject + private HAConfigDao haConfigDao; + + @Inject + private HostDao hostDao; + + @Inject + private ClusterDetailsDao clusterDetailsDao; + + @Inject + private DataCenterDetailsDao dataCenterDetailsDao; + + @Inject + private BackgroundPollManager pollManager; + + private List> haProviders; + private Map> haProviderMap = new HashMap<>(); + + private static ExecutorService healthCheckExecutor; + private static ExecutorService activityCheckExecutor; + private static ExecutorService recoveryExecutor; + private static ExecutorService fenceExecutor; + + private static final String HA_ENABLED_DETAIL = "resourceHAEnabled"; + + ////////////////////////////////////////////////////// + //////////////// HA Manager methods ////////////////// + ////////////////////////////////////////////////////// + + public Map haCounterMap = new ConcurrentHashMap<>(); + + public HAProvider getHAProvider(final String name) { + return haProviderMap.get(name); + } + + private String resourceCounterKey(final Long resourceId, final HAResource.ResourceType resourceType) { + return resourceId.toString() + resourceType.toString(); + } + + public synchronized HAResourceCounter getHACounter(final Long resourceId, final HAResource.ResourceType resourceType) { + final String key = resourceCounterKey(resourceId, resourceType); + if (!haCounterMap.containsKey(key)) { + haCounterMap.put(key, new HAResourceCounter()); + } + return haCounterMap.get(key); + } + + public synchronized void purgeHACounter(final Long resourceId, final HAResource.ResourceType resourceType) { + final String key = resourceCounterKey(resourceId, resourceType); + if (haCounterMap.containsKey(key)) { + haCounterMap.remove(key); + } + } + + public boolean transitionHAState(final HAConfig.Event event, final HAConfig haConfig) { + if (event == null || haConfig == null) { + return false; + } + final HAConfig.HAState currentHAState = haConfig.getState(); + try { + final HAConfig.HAState nextState = HAConfig.HAState.getStateMachine().getNextState(currentHAState, event); + boolean result = HAConfig.HAState.getStateMachine().transitTo(haConfig, event, null, haConfigDao); + if (result) { + final String message = String.format("Transitioned host HA state from:%s to:%s due to event:%s for the host id:%d", + currentHAState, nextState, event, haConfig.getResourceId()); + LOG.debug(message); + if (nextState == HAConfig.HAState.Recovering || nextState == HAConfig.HAState.Fencing || nextState == HAConfig.HAState.Fenced) { + ActionEventUtils.onActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), + Domain.ROOT_DOMAIN, EventTypes.EVENT_HA_STATE_TRANSITION, message); + } + } + return result; + } catch (NoTransitionException e) { + if (LOG.isTraceEnabled()) { + LOG.trace("Unable to find next HA state for current HA state: " + currentHAState + " for event: " + event + " for host" + haConfig.getResourceId()); + } + } + return false; + } + + private boolean transitionResourceStateToDisabled(final Partition partition) { + List resources; + if (partition.partitionType() == Partition.PartitionType.Cluster) { + resources = hostDao.findByClusterId(partition.getId()); + } else if (partition.partitionType() == Partition.PartitionType.Zone) { + resources = hostDao.findByDataCenterId(partition.getId()); + } else { + return true; + } + + boolean result = true; + for (final HAResource resource: resources) { + result = result && transitionHAState(HAConfig.Event.Disabled, + haConfigDao.findHAResource(resource.getId(), resource.resourceType())); + } + return result; + } + + private boolean checkHAOwnership(final HAConfig haConfig) { + // Skip for resources not owned by this mgmt server + return !(haConfig.getManagementServerId() != null + && haConfig.getManagementServerId() != ManagementServerNode.getManagementServerId()); + } + + private HAResource validateAndFindHAResource(final HAConfig haConfig) { + HAResource resource = null; + if (haConfig.getResourceType() == HAResource.ResourceType.Host) { + final Host host = hostDao.findById(haConfig.getResourceId()); + if (host != null && host.getRemoved() != null) { + return null; + } + resource = host; + if (resource == null && haConfig.getState() != HAConfig.HAState.Disabled) { + disableHA(haConfig.getResourceId(), haConfig.getResourceType()); + return null; + } + } + if (!haConfig.isEnabled() || !isHAEnabledForZone(resource) || !isHAEnabledForCluster(resource)) { + if (haConfig.getState() != HAConfig.HAState.Disabled) { + if (transitionHAState(HAConfig.Event.Disabled, haConfig) ) { + purgeHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + } + } + return null; + } else if (haConfig.getState() == HAConfig.HAState.Disabled) { + transitionHAState(HAConfig.Event.Enabled, haConfig); + } + return resource; + } + + private HAProvider validateAndFindHAProvider(final HAConfig haConfig, final HAResource resource) { + final HAProvider haProvider = haProviderMap.get(haConfig.getHaProvider()); + if (haProvider != null && !haProvider.isEligible(resource)) { + if (haConfig.getState() != HAConfig.HAState.Ineligible) { + transitionHAState(HAConfig.Event.Ineligible, haConfig); + } + return null; + } else if (haConfig.getState() == HAConfig.HAState.Ineligible) { + transitionHAState(HAConfig.Event.Eligible, haConfig); + } + return haProvider; + } + + public boolean isHAEnabledForZone(final HAResource resource) { + if (resource == null || resource.getDataCenterId() < 1L) { + return true; + } + final DataCenterDetailVO zoneDetails = dataCenterDetailsDao.findDetail(resource.getDataCenterId(), HA_ENABLED_DETAIL); + return zoneDetails == null || Strings.isNullOrEmpty(zoneDetails.getValue()) || Boolean.valueOf(zoneDetails.getValue()); + } + + private boolean isHAEnabledForCluster(final HAResource resource) { + if (resource == null || resource.getClusterId() == null) { + return true; + } + final ClusterDetailsVO clusterDetails = clusterDetailsDao.findDetail(resource.getClusterId(), HA_ENABLED_DETAIL); + return clusterDetails == null || Strings.isNullOrEmpty(clusterDetails.getValue()) || Boolean.valueOf(clusterDetails.getValue()); + } + + private boolean isHAEligibleForResource(final HAResource resource) { + if (resource == null || resource.getId() < 1L) { + return false; + } + HAResource.ResourceType resourceType = null; + if (resource instanceof Host) { + resourceType = HAResource.ResourceType.Host; + } + if (resourceType == null) { + return false; + } + final HAConfig haConfig = haConfigDao.findHAResource(resource.getId(), resourceType); + return haConfig != null && haConfig.isEnabled() + && haConfig.getState() != HAConfig.HAState.Disabled + && haConfig.getState() != HAConfig.HAState.Ineligible; + } + + public boolean isHAEligible(final HAResource resource) { + return resource != null && isHAEnabledForZone(resource) + && isHAEnabledForCluster(resource) + && isHAEligibleForResource(resource); + } + + public void validateHAProviderConfigForResource(final Long resourceId, final HAResource.ResourceType resourceType, final HAProvider haProvider) { + if (HAResource.ResourceType.Host.equals(resourceType)) { + final Host host = hostDao.findById(resourceId); + if (host.getHypervisorType() == null || haProvider.resourceSubType() == null || !host.getHypervisorType().toString().equals(haProvider.resourceSubType().toString())) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Incompatible haprovider provided for the resource of hypervisor type:" + host.getHypervisorType()); + } + } + } + + //////////////////////////////////////////////////////////////////// + //////////////// HA Investigator wrapper for Old HA //////////////// + //////////////////////////////////////////////////////////////////// + + public Boolean isVMAliveOnHost(final Host host) { + final HAConfig haConfig = haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host); + if (haConfig != null) { + if (haConfig.getState() == HAConfig.HAState.Fenced) { + if (LOG.isDebugEnabled()){ + LOG.debug("HA: Host is fenced " + host.getId()); + } + return false; + } + if (LOG.isDebugEnabled()){ + LOG.debug("HA: HOST is alive " + host.getId()); + } + return true; + } + return null; + } + + public Status getHostStatus(final Host host) { + final HAConfig haConfig = haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host); + if (haConfig != null) { + if (haConfig.getState() == HAConfig.HAState.Fenced) { + if (LOG.isDebugEnabled()){ + LOG.debug("HA: Agent is available/suspect/checking Up " + host.getId()); + } + return Status.Down; + } else if (haConfig.getState() == HAConfig.HAState.Degraded || haConfig.getState() == HAConfig.HAState.Recovering || haConfig.getState() == HAConfig.HAState.Recovered || haConfig.getState() == HAConfig.HAState.Fencing) { + if (LOG.isDebugEnabled()){ + LOG.debug("HA: Agent is disconnected " + host.getId()); + } + return Status.Disconnected; + } + return Status.Up; + } + return Status.Unknown; + } + + ////////////////////////////////////////////////////// + //////////////// HA API handlers ///////////////////// + ////////////////////////////////////////////////////// + + private boolean configureHA(final Long resourceId, final HAResource.ResourceType resourceType, final Boolean enable, final String haProvider) { + return Transaction.execute(new TransactionCallback() { + @Override + public Boolean doInTransaction(TransactionStatus status) { + HAConfigVO haConfig = (HAConfigVO) haConfigDao.findHAResource(resourceId, resourceType); + if (haConfig == null) { + haConfig = new HAConfigVO(); + if (haProvider != null) { + haConfig.setHaProvider(haProvider); + } + if (enable != null) { + haConfig.setEnabled(enable); + haConfig.setManagementServerId(ManagementServerNode.getManagementServerId()); + } + haConfig.setResourceId(resourceId); + haConfig.setResourceType(resourceType); + if (Strings.isNullOrEmpty(haConfig.getHaProvider())) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "HAProvider is not provided for the resource, failing configuration."); + } + if (haConfigDao.persist(haConfig) != null) { + return true; + } + } else { + if (enable != null) { + haConfig.setEnabled(enable); + } + if (haProvider != null) { + haConfig.setHaProvider(haProvider); + } + if (Strings.isNullOrEmpty(haConfig.getHaProvider())) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "HAProvider is not provided for the resource, failing configuration."); + } + return haConfigDao.update(haConfig.getId(), haConfig); + } + return false; + } + }); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_CONFIGURE, eventDescription = "configuring HA for resource") + public boolean configureHA(final Long resourceId, final HAResource.ResourceType resourceType, final String haProvider) { + Preconditions.checkArgument(resourceId != null && resourceId > 0L); + Preconditions.checkArgument(resourceType != null); + Preconditions.checkArgument(!Strings.isNullOrEmpty(haProvider)); + + if (!haProviderMap.containsKey(haProvider.toLowerCase())) { + throw new CloudRuntimeException("Given HA provider does not exist."); + } + validateHAProviderConfigForResource(resourceId, resourceType, haProviderMap.get(haProvider.toLowerCase())); + return configureHA(resourceId, resourceType, null, haProvider.toLowerCase()); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for resource") + public boolean enableHA(final Long resourceId, final HAResource.ResourceType resourceType) { + Preconditions.checkArgument(resourceId != null && resourceId > 0L); + Preconditions.checkArgument(resourceType != null); + return configureHA(resourceId, resourceType, true, null); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for resource") + public boolean disableHA(final Long resourceId, final HAResource.ResourceType resourceType) { + Preconditions.checkArgument(resourceId != null && resourceId > 0L); + Preconditions.checkArgument(resourceType != null); + boolean result = configureHA(resourceId, resourceType, false, null); + if (result) { + transitionHAState(HAConfig.Event.Disabled, haConfigDao.findHAResource(resourceId, resourceType)); + purgeHACounter(resourceId, resourceType); + } + return result; + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for a cluster") + public boolean enableHA(final Cluster cluster) { + clusterDetailsDao.persist(cluster.getId(), HA_ENABLED_DETAIL, String.valueOf(true)); + return true; + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for a cluster") + public boolean disableHA(final Cluster cluster) { + clusterDetailsDao.persist(cluster.getId(), HA_ENABLED_DETAIL, String.valueOf(false)); + return transitionResourceStateToDisabled(cluster); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for a zone") + public boolean enableHA(final DataCenter zone) { + dataCenterDetailsDao.persist(zone.getId(), HA_ENABLED_DETAIL, String.valueOf(true)); + return true; + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for a zone") + public boolean disableHA(final DataCenter zone) { + dataCenterDetailsDao.persist(zone.getId(), HA_ENABLED_DETAIL, String.valueOf(false)); + return transitionResourceStateToDisabled(zone); + } + + @Override + public List listHAResources(final Long resourceId, final HAResource.ResourceType resourceType) { + return haConfigDao.listHAResource(resourceId, resourceType); + } + + @Override + public List listHAProviders(final HAResource.ResourceType resourceType, final HAResource.ResourceSubType entityType) { + final List haProviderNames = new ArrayList<>(); + for (final HAProvider haProvider : haProviders) { + if (haProvider.resourceType().equals(resourceType) && haProvider.resourceSubType().equals(entityType)) { + haProviderNames.add(haProvider.getClass().getSimpleName()); + } + } + return haProviderNames; + } + + @Override + public List> getCommands() { + List> cmdList = new ArrayList<>(); + cmdList.add(ConfigureHAForHostCmd.class); + cmdList.add(EnableHAForHostCmd.class); + cmdList.add(EnableHAForClusterCmd.class); + cmdList.add(EnableHAForZoneCmd.class); + cmdList.add(DisableHAForHostCmd.class); + cmdList.add(DisableHAForClusterCmd.class); + cmdList.add(DisableHAForZoneCmd.class); + cmdList.add(ListHostHAResourcesCmd.class); + cmdList.add(ListHostHAProvidersCmd.class); + return cmdList; + } + + ////////////////////////////////////////////////////////////////// + //////////////// Clustered Manager Listeners ///////////////////// + ////////////////////////////////////////////////////////////////// + + @Override + public void onManagementNodeJoined(List nodeList, long selfNodeId) { + + } + + @Override + public void onManagementNodeLeft(List nodeList, long selfNodeId) { + + } + + @Override + public void onManagementNodeIsolated() { + + } + + /////////////////////////////////////////////////// + //////////////// Manager Init ///////////////////// + /////////////////////////////////////////////////// + + @Override + public boolean start() { + haProviderMap.clear(); + for (final HAProvider haProvider : haProviders) { + haProviderMap.put(haProvider.getClass().getSimpleName().toLowerCase(), haProvider); + } + return true; + } + + @Override + public boolean stop() { + haConfigDao.expireServerOwnership(ManagementServerNode.getManagementServerId()); + return true; + } + + @Override + public boolean configure(final String name, final Map params) throws ConfigurationException { + // Health Check + final int healthCheckWorkers = MaxConcurrentHealthCheckOperations.value(); + final int healthCheckQueueSize = MaxPendingHealthCheckOperations.value(); + healthCheckExecutor = new ThreadPoolExecutor(healthCheckWorkers, healthCheckWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(healthCheckQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + // Activity Check + final int activityCheckWorkers = MaxConcurrentActivityCheckOperations.value(); + final int activityCheckQueueSize = MaxPendingActivityCheckOperations.value(); + activityCheckExecutor = new ThreadPoolExecutor(activityCheckWorkers, activityCheckWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(activityCheckQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + // Recovery + final int recoveryOperationWorkers = MaxConcurrentRecoveryOperations.value(); + final int recoveryOperationQueueSize = MaxPendingRecoveryOperations.value(); + recoveryExecutor = new ThreadPoolExecutor(recoveryOperationWorkers, recoveryOperationWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(recoveryOperationQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + // Fence + final int fenceOperationWorkers = MaxConcurrentFenceOperations.value(); + final int fenceOperationQueueSize = MaxPendingFenceOperations.value(); + fenceExecutor = new ThreadPoolExecutor(fenceOperationWorkers, fenceOperationWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(fenceOperationQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + pollManager.submitTask(new HealthCheckPollTask()); + pollManager.submitTask(new ActivityCheckPollTask()); + pollManager.submitTask(new RecoveryPollTask()); + pollManager.submitTask(new FencingPollTask()); + + LOG.debug("HA manager has been configured"); + return true; + } + + public void setHaProviders(List> haProviders) { + this.haProviders = haProviders; + } + + @Override + public String getConfigComponentName() { + return HAManager.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[] { + MaxConcurrentHealthCheckOperations, + MaxPendingHealthCheckOperations, + MaxConcurrentActivityCheckOperations, + MaxPendingActivityCheckOperations, + MaxConcurrentRecoveryOperations, + MaxPendingRecoveryOperations, + MaxConcurrentFenceOperations, + MaxPendingFenceOperations + }; + } + + ///////////////////////////////////////////////// + //////////////// Poll Tasks ///////////////////// + ///////////////////////////////////////////////// + + private final class HealthCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA health check task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + + if (haConfig.getState() == HAConfig.HAState.Suspect) { + if (counter.canPerformActivityCheck((Long)(haProvider.getConfigValue(HAProviderConfig.MaxActivityCheckInterval, resource)))) { + transitionHAState(HAConfig.Event.PerformActivityCheck, haConfig); + } + } + + if (haConfig.getState() == HAConfig.HAState.Degraded) { + if (counter.canRecheckActivity((Long)(haProvider.getConfigValue(HAProviderConfig.MaxDegradedWaitTimeout, resource)))) { + transitionHAState(HAConfig.Event.PeriodicRecheckResourceActivity, haConfig); + } + } + + switch (haConfig.getState()) { + case Available: + case Suspect: + case Degraded: + case Fenced: + final HealthCheckTask task = ComponentContext.inject(new HealthCheckTask(resource, haProvider, haConfig, + HAProviderConfig.HealthCheckTimeout, healthCheckExecutor)); + healthCheckExecutor.submit(task); + break; + default: + break; + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform health checks in HA manager", t); + } + } + } + + private final class ActivityCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA activity check task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + if (haConfig.getState() == HAConfig.HAState.Checking) { + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + final ActivityCheckTask job = ComponentContext.inject(new ActivityCheckTask(resource, haProvider, haConfig, + HAProviderConfig.ActivityCheckTimeout, activityCheckExecutor, counter.getSuspectTimeStamp())); + activityCheckExecutor.submit(job); + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform activity checks in HA manager", t); + } + } + } + + private final class RecoveryPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA recovery task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (haConfig.getState() == HAConfig.HAState.Recovering) { + if (counter.canAttemptRecovery()) { + if (counter.getRecoveryCounter() >= (Long)(haProvider.getConfigValue(HAProviderConfig.MaxRecoveryAttempts, resource))) { + transitionHAState(HAConfig.Event.RecoveryOperationThresholdExceeded, haConfig); + continue; + } + + final RecoveryTask task = ComponentContext.inject(new RecoveryTask(resource, haProvider, haConfig, + HAProviderConfig.RecoveryTimeout, recoveryExecutor)); + final Future recoveryFuture = recoveryExecutor.submit(task); + counter.setRecoveryFuture(recoveryFuture); + counter.incrRecoveryCounter(); + } + } + if (haConfig.getState() == HAConfig.HAState.Recovered) { + counter.markRecoveryStarted(); + if (counter.canExitRecovery((Long)(haProvider.getConfigValue(HAProviderConfig.RecoveryWaitTimeout, resource)))) { + transitionHAState(HAConfig.Event.RecoveryWaitPeriodTimeout, haConfig); + counter.markRecoveryCompleted(); + } + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform recovery operation in HA manager", t); + } + } + } + + private final class FencingPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA fencing task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (counter.lastFencingCompleted()) { + if (haConfig.getState() == HAConfig.HAState.Fencing) { + final FenceTask task = ComponentContext.inject(new FenceTask(resource, haProvider, haConfig, + HAProviderConfig.FenceTimeout, fenceExecutor)); + final Future fenceFuture = fenceExecutor.submit(task); + counter.setFenceFuture(fenceFuture); + } + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform fencing operation in HA manager", t); + } + } + } +} diff --git a/server/src/org/apache/cloudstack/ha/HAResourceCounter.java b/server/src/org/apache/cloudstack/ha/HAResourceCounter.java new file mode 100644 index 00000000000..f955fd2f8fd --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/HAResourceCounter.java @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; + +public final class HAResourceCounter { + private AtomicLong activityCheckCounter = new AtomicLong(0); + private AtomicLong activityCheckFailureCounter = new AtomicLong(0); + private AtomicLong recoveryOperationCounter = new AtomicLong(0); + + private Long firstHealthCheckFailureTimestamp; + private Long lastActivityCheckTimestamp; + private Long degradedTimestamp; + private Long recoverTimestamp; + private Future recoveryFuture; + private Future fenceFuture; + + public long getActivityCheckCounter() { + return activityCheckCounter.get(); + } + + public long getRecoveryCounter() { + return recoveryOperationCounter.get(); + } + + public synchronized void incrActivityCounter(final boolean isFailure) { + lastActivityCheckTimestamp = System.currentTimeMillis(); + activityCheckCounter.incrementAndGet(); + if (isFailure) { + activityCheckFailureCounter.incrementAndGet(); + } + } + + public synchronized void incrRecoveryCounter() { + recoveryOperationCounter.incrementAndGet(); + } + + public synchronized void resetActivityCounter() { + activityCheckCounter.set(0); + activityCheckFailureCounter.set(0); + } + + public synchronized void resetRecoveryCounter() { + recoverTimestamp = null; + recoveryFuture = null; + recoveryOperationCounter.set(0); + } + + public synchronized void resetSuspectTimestamp() { + firstHealthCheckFailureTimestamp = null; + } + + public boolean hasActivityThresholdExceeded(final double failureRatio) { + return activityCheckFailureCounter.get() > (activityCheckCounter.get() * failureRatio); + } + + public boolean canPerformActivityCheck(final Long activityCheckInterval) { + return lastActivityCheckTimestamp == null || (System.currentTimeMillis() - lastActivityCheckTimestamp) > (activityCheckInterval * 1000); + } + + public boolean canRecheckActivity(final Long maxDegradedPeriod) { + return degradedTimestamp == null || (System.currentTimeMillis() - degradedTimestamp) > (maxDegradedPeriod * 1000); + } + + public boolean canExitRecovery(final Long maxRecoveryWaitPeriod) { + return recoverTimestamp != null && (System.currentTimeMillis() - recoverTimestamp) > (maxRecoveryWaitPeriod * 1000); + } + + public long getSuspectTimeStamp() { + if (firstHealthCheckFailureTimestamp == null) { + firstHealthCheckFailureTimestamp = System.currentTimeMillis(); + } + return firstHealthCheckFailureTimestamp; + } + + public synchronized void markResourceSuspected() { + firstHealthCheckFailureTimestamp = System.currentTimeMillis(); + } + + public synchronized void markResourceDegraded() { + degradedTimestamp = System.currentTimeMillis(); + } + + public synchronized void markRecoveryStarted() { + if (recoverTimestamp == null) { + recoverTimestamp = System.currentTimeMillis(); + } + } + + public synchronized void markRecoveryCompleted() { + recoverTimestamp = null; + recoveryFuture = null; + } + + public void setRecoveryFuture(final Future future) { + recoveryFuture = future; + } + + public boolean canAttemptRecovery() { + return recoveryFuture == null || recoveryFuture.isDone(); + } + + public void setFenceFuture(final Future future) { + fenceFuture = future; + } + + public boolean lastFencingCompleted() { + return fenceFuture == null || fenceFuture.isDone(); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/ActivityCheckerInterface.java b/server/src/org/apache/cloudstack/ha/provider/ActivityCheckerInterface.java new file mode 100644 index 00000000000..1f280297436 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/ActivityCheckerInterface.java @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import org.joda.time.DateTime; + +import org.apache.cloudstack.ha.HAResource; + +import com.cloud.utils.component.Adapter; + +/** + * Checking activity requires deeper investigation. This will be invoked when a health check has failed. + * + * @param + */ +public interface ActivityCheckerInterface extends Adapter { + + boolean isActive(R r, DateTime t) throws HACheckerException ; + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HACheckerException.java b/server/src/org/apache/cloudstack/ha/provider/HACheckerException.java new file mode 100644 index 00000000000..9e736221d63 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HACheckerException.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.ha.provider; + +public class HACheckerException extends Exception { + + private static final long serialVersionUID = 1L; + + public HACheckerException(String string, Exception e) { + super(string, e); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HAFenceException.java b/server/src/org/apache/cloudstack/ha/provider/HAFenceException.java new file mode 100644 index 00000000000..80a7c3be92a --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HAFenceException.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.ha.provider; + +public class HAFenceException extends Exception { + + private static final long serialVersionUID = 1L; + + public HAFenceException(String string, Exception e) { + super(string, e); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HAProvider.java b/server/src/org/apache/cloudstack/ha/provider/HAProvider.java new file mode 100644 index 00000000000..bcc590c965f --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HAProvider.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import com.cloud.utils.component.Adapter; + +import org.apache.cloudstack.ha.HAConfig; +import org.joda.time.DateTime; + +import org.apache.cloudstack.ha.HAResource; + +public interface HAProvider extends Adapter { + + enum HAProviderConfig { + HealthCheckTimeout, + ActivityCheckTimeout, + RecoveryTimeout, + FenceTimeout, + ActivityCheckFailureRatio, + MaxActivityChecks, + MaxRecoveryAttempts, + MaxActivityCheckInterval, + MaxDegradedWaitTimeout, + RecoveryWaitTimeout + }; + + HAResource.ResourceType resourceType(); + + HAResource.ResourceSubType resourceSubType(); + + boolean isDisabled(R r); + + boolean isInMaintenanceMode(R r); + + boolean isEligible(R r); + + boolean isHealthy(R r) throws HACheckerException; + + boolean hasActivity(R r, DateTime afterThis) throws HACheckerException; + + boolean recover(R r) throws HARecoveryException; + + boolean fence(R r) throws HAFenceException; + + void setFenced(R r); + + void sendAlert(R r, HAConfig.HAState nextState); + + Object getConfigValue(HAProviderConfig name, R r); +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HARecoveryException.java b/server/src/org/apache/cloudstack/ha/provider/HARecoveryException.java new file mode 100644 index 00000000000..893e21c7230 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HARecoveryException.java @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +public class HARecoveryException extends Exception { + + private static final long serialVersionUID = 1L; + + public HARecoveryException(String string, Exception e) { + super(string, e); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HealthCheckerInterface.java b/server/src/org/apache/cloudstack/ha/provider/HealthCheckerInterface.java new file mode 100644 index 00000000000..ec0a5810214 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HealthCheckerInterface.java @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import org.apache.cloudstack.ha.HAResource; + +import com.cloud.utils.component.Adapter; + +/** + * Health checker is a quick way to find out if a resource is active. Like pinging the host or checking agent health. + * + * @param + */ + +public interface HealthCheckerInterface extends Adapter { + + boolean isHealthy(R r); + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HostHAProvider.java b/server/src/org/apache/cloudstack/ha/provider/HostHAProvider.java new file mode 100644 index 00000000000..4867d92f35d --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HostHAProvider.java @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import com.cloud.host.Host; + +public interface HostHAProvider extends HAProvider { +} diff --git a/server/src/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java b/server/src/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java new file mode 100644 index 00000000000..43aa20015fa --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider.host; + +import com.cloud.agent.AgentManager; +import com.cloud.alert.AlertManager; +import com.cloud.ha.HighAvailabilityManager; +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.host.Status.Event; +import com.cloud.resource.ResourceManager; +import com.cloud.resource.ResourceState; +import com.cloud.utils.component.AdapterBase; +import com.cloud.utils.fsm.NoTransitionException; +import org.apache.cloudstack.alert.AlertService; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.log4j.Logger; + +import javax.inject.Inject; + +public abstract class HAAbstractHostProvider extends AdapterBase implements HAProvider { + + private final static Logger LOG = Logger.getLogger(HAAbstractHostProvider.class); + + @Inject + private AlertManager alertManager; + @Inject + protected AgentManager agentManager; + @Inject + protected ResourceManager resourceManager; + @Inject + protected HighAvailabilityManager oldHighAvailabilityManager; + + + @Override + public HAResource.ResourceType resourceType() { + return HAResource.ResourceType.Host; + } + + public HAResource.ResourceSubType resourceSubType() { + return HAResource.ResourceSubType.Unknown; + } + + @Override + public boolean isDisabled(final Host host) { + return host.isDisabled(); + } + + @Override + public boolean isInMaintenanceMode(final Host host) { + return host.isInMaintenanceStates(); + } + + @Override + public void setFenced(final Host r) { + if (r.getState() != Status.Down) { + try { + LOG.debug("Trying to disconnect the host without investigation and scheduling HA for the VMs on host id=" + r.getId()); + agentManager.disconnectWithoutInvestigation(r.getId(), Event.HostDown); + oldHighAvailabilityManager.scheduleRestartForVmsOnHost((HostVO)r, true); + } catch (Exception e) { + LOG.error("Failed to disconnect host and schedule HA restart of VMs after fencing the host: ", e); + } + try { + resourceManager.resourceStateTransitTo(r, ResourceState.Event.InternalEnterMaintenance, ManagementServerNode.getManagementServerId()); + } catch (NoTransitionException e) { + LOG.error("Failed to put host in maintenance mode after host-ha fencing and scheduling VM-HA: ", e); + } + } + } + + @Override + public void sendAlert(final Host host, final HAConfig.HAState nextState) { + String subject = "HA operation performed for host"; + String body = subject; + if (HAConfig.HAState.Fencing.equals(nextState)) { + subject = String.format("HA Fencing of host id=%d, in dc id=%d performed", host.getId(), host.getDataCenterId()); + body = String.format("HA Fencing has been performed for host id=%d, uuid=%s in datacenter id=%d", host.getId(), host.getUuid(), host.getDataCenterId()); + } else if (HAConfig.HAState.Recovering.equals(nextState)) { + subject = String.format("HA Recovery of host id=%d, in dc id=%d performed", host.getId(), host.getDataCenterId()); + body = String.format("HA Recovery has been performed for host id=%d, uuid=%s in datacenter id=%d", host.getId(), host.getUuid(), host.getDataCenterId()); + } + alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HA_ACTION, host.getDataCenterId(), host.getPodId(), subject, body); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/task/ActivityCheckTask.java b/server/src/org/apache/cloudstack/ha/task/ActivityCheckTask.java new file mode 100644 index 00000000000..ab8af6124a7 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/ActivityCheckTask.java @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.HAResourceCounter; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HAProvider.HAProviderConfig; +import org.apache.log4j.Logger; + +import javax.inject.Inject; + +import org.joda.time.DateTime; +import java.util.concurrent.ExecutorService; + +public class ActivityCheckTask extends BaseHATask { + + public static final Logger LOG = Logger.getLogger(ActivityCheckTask.class); + + @Inject + private HAManager haManager; + + private final long disconnectTime; + + public ActivityCheckTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig, + final ExecutorService executor, final long disconnectTime) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + this.disconnectTime = disconnectTime; + } + + public boolean performAction() throws HACheckerException { + return getHaProvider().hasActivity(getResource(), new DateTime(disconnectTime)); + } + + public void processResult(boolean result, Throwable t) { + final HAConfig haConfig = getHaConfig(); + final HAProvider haProvider = getHaProvider(); + final HAResource resource = getResource(); + final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + + if (t != null && t instanceof HACheckerException) { + haManager.transitionHAState(HAConfig.Event.Ineligible, getHaConfig()); + counter.resetActivityCounter(); + return; + } + + counter.incrActivityCounter(!result); + + long maxActivityChecks = (Long)haProvider.getConfigValue(HAProviderConfig.MaxActivityChecks, resource); + if (counter.getActivityCheckCounter() < maxActivityChecks) { + haManager.transitionHAState(HAConfig.Event.TooFewActivityCheckSamples, haConfig); + return; + } + + double activityCheckFailureRatio = (Double)haProvider.getConfigValue(HAProviderConfig.ActivityCheckFailureRatio, resource); + if (counter.hasActivityThresholdExceeded(activityCheckFailureRatio)) { + haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureOverThresholdRatio, haConfig); + } else { + haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureUnderThresholdRatio, haConfig); + counter.markResourceDegraded(); + } + counter.resetActivityCounter(); + } +} diff --git a/server/src/org/apache/cloudstack/ha/task/BaseHATask.java b/server/src/org/apache/cloudstack/ha/task/BaseHATask.java new file mode 100644 index 00000000000..3ed87388026 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/BaseHATask.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; +import org.apache.log4j.Logger; + +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +public abstract class BaseHATask implements Callable { + public static final Logger LOG = Logger.getLogger(BaseHATask.class); + + private final HAResource resource; + private final HAProvider haProvider; + private final HAConfig haConfig; + private final ExecutorService executor; + private Long timeout; + + public BaseHATask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig, + final ExecutorService executor) { + this.resource = resource; + this.haProvider = haProvider; + this.haConfig = haConfig; + this.executor = executor; + this.timeout = (Long)haProvider.getConfigValue(haProviderConfig, resource); + } + + public HAProvider getHaProvider() { + return haProvider; + } + + public HAConfig getHaConfig() { + return haConfig; + } + + public HAResource getResource() { + return resource; + } + + public String getTaskType() { + return this.getClass().getSimpleName(); + } + + public boolean performAction() throws HACheckerException, HAFenceException, HARecoveryException { + return true; + } + + public abstract void processResult(boolean result, Throwable e); + + @Override + public Boolean call() { + final Future future = executor.submit(new Callable() { + @Override + public Boolean call() throws HACheckerException, HAFenceException, HARecoveryException { + return performAction(); + } + }); + + boolean result = false; + Throwable throwable = null; + try { + if (timeout == null) { + result = future.get(); + } else { + result = future.get(timeout, TimeUnit.SECONDS); + } + } catch (InterruptedException | ExecutionException e) { + LOG.warn("Exception occurred while running " + getTaskType() + " on a resource: " + e.getMessage(), e.getCause()); + throwable = e.getCause(); + } catch (TimeoutException e) { + LOG.trace(getTaskType() + " operation timed out for resource id:" + resource.getId()); + } + processResult(result, throwable); + return result; + } + +} diff --git a/server/src/org/apache/cloudstack/ha/task/FenceTask.java b/server/src/org/apache/cloudstack/ha/task/FenceTask.java new file mode 100644 index 00000000000..d9fd62c164c --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/FenceTask.java @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.HAResourceCounter; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; + +import javax.inject.Inject; +import java.util.concurrent.ExecutorService; + +public class FenceTask extends BaseHATask { + + @Inject + private HAManager haManager; + + public FenceTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, + final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + } + + public boolean performAction() throws HACheckerException, HAFenceException { + return getHaProvider().fence(getResource()); + } + + public void processResult(boolean result, Throwable e) { + final HAConfig haConfig = getHaConfig(); + final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (result) { + counter.resetRecoveryCounter(); + haManager.transitionHAState(HAConfig.Event.Fenced, haConfig); + getHaProvider().setFenced(getResource()); + } + getHaProvider().sendAlert(getResource(), HAConfig.HAState.Fencing); + } +} diff --git a/server/src/org/apache/cloudstack/ha/task/HealthCheckTask.java b/server/src/org/apache/cloudstack/ha/task/HealthCheckTask.java new file mode 100644 index 00000000000..92dcdc2164d --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/HealthCheckTask.java @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.HAResourceCounter; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import java.util.concurrent.ExecutorService; + +public class HealthCheckTask extends BaseHATask { + + @Inject + private HAManager haManager; + + public static final Logger LOG = Logger.getLogger(HealthCheckTask.class); + + public HealthCheckTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, + final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + } + + public boolean performAction() throws HACheckerException { + return getHaProvider().isHealthy(getResource()); + } + + public void processResult(boolean result, Throwable e) { + final HAConfig haConfig = getHaConfig(); + final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (result) { + haManager.transitionHAState(HAConfig.Event.HealthCheckPassed, haConfig); + if (haConfig.getState() == HAConfig.HAState.Fenced) { + haManager.disableHA(haConfig.getResourceId(), haConfig.getResourceType()); + } + counter.resetSuspectTimestamp(); + counter.resetActivityCounter(); + counter.resetRecoveryCounter(); + } else { + haManager.transitionHAState(HAConfig.Event.HealthCheckFailed, haConfig); + counter.markResourceSuspected(); + } + } +} diff --git a/server/src/org/apache/cloudstack/ha/task/RecoveryTask.java b/server/src/org/apache/cloudstack/ha/task/RecoveryTask.java new file mode 100644 index 00000000000..b4eb863fbfc --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/RecoveryTask.java @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; + +import javax.inject.Inject; +import java.util.concurrent.ExecutorService; + +public class RecoveryTask extends BaseHATask { + + @Inject + private HAManager haManager; + + public RecoveryTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, + final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + } + + public boolean performAction() throws HACheckerException, HARecoveryException { + return getHaProvider().recover(getResource()); + } + + public void processResult(boolean result, Throwable e) { + final HAConfig haConfig = getHaConfig(); + if (result) { + haManager.transitionHAState(HAConfig.Event.Recovered, haConfig); + } + getHaProvider().sendAlert(getResource(), HAConfig.HAState.Recovering); + } +} diff --git a/server/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementServiceImpl.java b/server/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementServiceImpl.java index bb099c89085..e48f58983cd 100644 --- a/server/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementServiceImpl.java +++ b/server/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementServiceImpl.java @@ -31,7 +31,6 @@ import com.cloud.host.dao.HostDao; import com.cloud.org.Cluster; import com.cloud.utils.component.Manager; import com.cloud.utils.component.ManagerBase; -import com.cloud.utils.db.GlobalLock; import com.cloud.utils.db.Transaction; import com.cloud.utils.db.TransactionCallback; import com.cloud.utils.db.TransactionStatus; @@ -45,10 +44,13 @@ import org.apache.cloudstack.api.response.OutOfBandManagementResponse; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.outofbandmanagement.driver.OutOfBandManagementDriverChangePasswordCommand; import org.apache.cloudstack.outofbandmanagement.driver.OutOfBandManagementDriverPowerCommand; import org.apache.cloudstack.outofbandmanagement.driver.OutOfBandManagementDriverResponse; +import org.apache.cloudstack.poll.BackgroundPollManager; +import org.apache.cloudstack.poll.BackgroundPollTask; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; @@ -79,6 +81,8 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf private HostDao hostDao; @Inject private AlertManager alertMgr; + @Inject + private BackgroundPollManager backgroundPollManager; private String name; private long serviceId; @@ -87,15 +91,10 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf private final Map outOfBandManagementDriversMap = new HashMap(); private static final String OOBM_ENABLED_DETAIL = "outOfBandManagementEnabled"; - private static final int ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_HOST = 120; private static Cache hostAlertCache; private static ExecutorService backgroundSyncBlockingExecutor; - private String getOutOfBandManagementHostLock(long id) { - return "oobm.host." + id; - } - private void initializeDriversMap() { if (outOfBandManagementDriversMap.isEmpty() && outOfBandManagementDrivers != null && outOfBandManagementDrivers.size() > 0) { for (final OutOfBandManagementDriver driver : outOfBandManagementDrivers) { @@ -204,12 +203,14 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf OutOfBandManagement.PowerState currentPowerState = outOfBandManagementHost.getPowerState(); try { OutOfBandManagement.PowerState newPowerState = OutOfBandManagement.PowerState.getStateMachine().getNextState(currentPowerState, event); - boolean result = outOfBandManagementDao.updateState(currentPowerState, event, newPowerState, outOfBandManagementHost, null); + boolean result = OutOfBandManagement.PowerState.getStateMachine().transitTo(outOfBandManagementHost, event, null, outOfBandManagementDao); if (result) { final String message = String.format("Transitioned out-of-band management power state from:%s to:%s due to event:%s for the host id:%d", currentPowerState, newPowerState, event, outOfBandManagementHost.getHostId()); LOG.debug(message); - ActionEventUtils.onActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), Domain.ROOT_DOMAIN, - EventTypes.EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION, message); + if (newPowerState == OutOfBandManagement.PowerState.Unknown) { + ActionEventUtils.onActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), Domain.ROOT_DOMAIN, + EventTypes.EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION, message); + } } return result; } catch (NoTransitionException ignored) { @@ -280,7 +281,7 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf public void submitBackgroundPowerSyncTask(final Host host) { if (host != null) { - backgroundSyncBlockingExecutor.submit(new OutOfBandManagementBackgroundTask(this, host, OutOfBandManagement.PowerOperation.STATUS)); + backgroundSyncBlockingExecutor.submit(new PowerOperationTask(this, host, OutOfBandManagement.PowerOperation.STATUS)); } } @@ -358,7 +359,7 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf @Override @ActionEvent(eventType = EventTypes.EVENT_HOST_OUTOFBAND_MANAGEMENT_CONFIGURE, eventDescription = "updating out-of-band management configuration") - public OutOfBandManagementResponse configureOutOfBandManagement(final Host host, final ImmutableMap options) { + public OutOfBandManagementResponse configure(final Host host, final ImmutableMap options) { OutOfBandManagement outOfBandManagementConfig = outOfBandManagementDao.findByHost(host.getId()); if (outOfBandManagementConfig == null) { outOfBandManagementConfig = outOfBandManagementDao.persist(new OutOfBandManagementVO(host.getId())); @@ -386,7 +387,7 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf @Override @ActionEvent(eventType = EventTypes.EVENT_HOST_OUTOFBAND_MANAGEMENT_ACTION, eventDescription = "issuing host out-of-band management action", async = true) - public OutOfBandManagementResponse executeOutOfBandManagementPowerOperation(final Host host, final OutOfBandManagement.PowerOperation powerOperation, final Long timeout) { + public OutOfBandManagementResponse executePowerOperation(final Host host, final OutOfBandManagement.PowerOperation powerOperation, final Long timeout) { checkOutOfBandManagementEnabledByZoneClusterHost(host); final OutOfBandManagement outOfBandManagementConfig = getConfigForHost(host); final ImmutableMap options = getOptions(outOfBandManagementConfig); @@ -430,64 +431,51 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf @Override @ActionEvent(eventType = EventTypes.EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD, eventDescription = "updating out-of-band management password") - public OutOfBandManagementResponse changeOutOfBandManagementPassword(final Host host, final String newPassword) { + public OutOfBandManagementResponse changePassword(final Host host, final String newPassword) { checkOutOfBandManagementEnabledByZoneClusterHost(host); if (Strings.isNullOrEmpty(newPassword)) { throw new CloudRuntimeException(String.format("Cannot change out-of-band management password as provided new-password is null or empty for the host %s.", host.getUuid())); } - GlobalLock outOfBandManagementHostLock = GlobalLock.getInternLock(getOutOfBandManagementHostLock(host.getId())); - try { - if (outOfBandManagementHostLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_HOST)) { - try { - final OutOfBandManagement outOfBandManagementConfig = outOfBandManagementDao.findByHost(host.getId()); - final ImmutableMap options = getOptions(outOfBandManagementConfig); - if (!(options.containsKey(OutOfBandManagement.Option.PASSWORD) && !Strings.isNullOrEmpty(options.get(OutOfBandManagement.Option.PASSWORD)))) { - throw new CloudRuntimeException(String.format("Cannot change out-of-band management password as we've no previously configured password for the host %s.", host.getUuid())); - } - final OutOfBandManagementDriver driver = getDriver(outOfBandManagementConfig); - - final OutOfBandManagementDriverChangePasswordCommand cmd = new OutOfBandManagementDriverChangePasswordCommand(options, ActionTimeout.valueIn(host.getClusterId()), newPassword); - final OutOfBandManagementDriverResponse driverResponse; - try { - driverResponse = driver.execute(cmd); - } catch (Exception e) { - LOG.error("Out-of-band management change password failed due to driver error: " + e.getMessage()); - throw new CloudRuntimeException(String.format("Failed to change out-of-band management password for host (%s) due to driver error: %s", host.getUuid(), e.getMessage())); - } - - if (!driverResponse.isSuccess()) { - throw new CloudRuntimeException(String.format("Failed to change out-of-band management password for host (%s) with error: %s", host.getUuid(), driverResponse.getError())); - } - - final boolean updatedConfigResult = Transaction.execute(new TransactionCallback() { - @Override - public Boolean doInTransaction(TransactionStatus status) { - OutOfBandManagement updatedOutOfBandManagementConfig = outOfBandManagementDao.findByHost(host.getId()); - updatedOutOfBandManagementConfig.setPassword(newPassword); - return outOfBandManagementDao.update(updatedOutOfBandManagementConfig.getId(), (OutOfBandManagementVO) updatedOutOfBandManagementConfig); - } - }); - - if (!updatedConfigResult) { - LOG.error(String.format("Succeeded to change out-of-band management password but failed to updated in database the new password:%s for the host id:%d", newPassword, host.getId())); - } - - final OutOfBandManagementResponse response = new OutOfBandManagementResponse(); - response.setSuccess(updatedConfigResult && driverResponse.isSuccess()); - response.setResultDescription(driverResponse.getResult()); - response.setId(host.getUuid()); - return response; - } finally { - outOfBandManagementHostLock.unlock(); - } - } else { - LOG.error("Unable to acquire synchronization lock to change out-of-band management password for host id: " + host.getId()); - throw new CloudRuntimeException(String.format("Unable to acquire lock to change out-of-band management password for host (%s), please try after some time.", host.getUuid())); - } - } finally { - outOfBandManagementHostLock.releaseRef(); + final OutOfBandManagement outOfBandManagementConfig = outOfBandManagementDao.findByHost(host.getId()); + final ImmutableMap options = getOptions(outOfBandManagementConfig); + if (!(options.containsKey(OutOfBandManagement.Option.PASSWORD) && !Strings.isNullOrEmpty(options.get(OutOfBandManagement.Option.PASSWORD)))) { + throw new CloudRuntimeException(String.format("Cannot change out-of-band management password as we've no previously configured password for the host %s.", host.getUuid())); } + final OutOfBandManagementDriver driver = getDriver(outOfBandManagementConfig); + final OutOfBandManagementDriverChangePasswordCommand changePasswordCmd = new OutOfBandManagementDriverChangePasswordCommand(options, ActionTimeout.valueIn(host.getClusterId()), newPassword); + + final boolean changePasswordResult = Transaction.execute(new TransactionCallback() { + @Override + public Boolean doInTransaction(TransactionStatus status) { + final OutOfBandManagement updatedOutOfBandManagementConfig = outOfBandManagementDao.findByHost(host.getId()); + updatedOutOfBandManagementConfig.setPassword(newPassword); + boolean result = outOfBandManagementDao.update(updatedOutOfBandManagementConfig.getId(), (OutOfBandManagementVO) updatedOutOfBandManagementConfig); + + if (!result) { + throw new CloudRuntimeException(String.format("Failed to change out-of-band management password for host (%s) in the database.", host.getUuid())); + } + + final OutOfBandManagementDriverResponse driverResponse; + try { + driverResponse = driver.execute(changePasswordCmd); + } catch (Exception e) { + LOG.error("Out-of-band management change password failed due to driver error: " + e.getMessage()); + throw new CloudRuntimeException(String.format("Failed to change out-of-band management password for host (%s) due to driver error: %s", host.getUuid(), e.getMessage())); + } + + if (!driverResponse.isSuccess()) { + throw new CloudRuntimeException(String.format("Failed to change out-of-band management password for host (%s) with error: %s", host.getUuid(), driverResponse.getError())); + } + + return result && driverResponse.isSuccess(); + } + }); + + final OutOfBandManagementResponse response = new OutOfBandManagementResponse(); + response.setSuccess(changePasswordResult ); + response.setId(host.getUuid()); + return response; } @Override @@ -518,7 +506,9 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf 0L, TimeUnit.MILLISECONDS, new ArrayBlockingQueue(10 * poolSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); - LOG.info("Starting out-of-band management background sync executor with thread pool-size=" + poolSize + " and background sync thread interval=" + SyncThreadInterval.value() + "s"); + backgroundPollManager.submitTask(new OutOfBandManagementPowerStatePollTask()); + + LOG.info("Starting out-of-band management background sync executor with thread pool-size=" + poolSize); return true; } @@ -531,7 +521,7 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf @Override public boolean stop() { backgroundSyncBlockingExecutor.shutdown(); - outOfBandManagementDao.expireOutOfBandManagementOwnershipByServer(getId()); + outOfBandManagementDao.expireServerOwnership(getId()); return true; } @@ -542,7 +532,7 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf @Override public ConfigKey[] getConfigKeys() { - return new ConfigKey[] {ActionTimeout, SyncThreadInterval, SyncThreadPoolSize}; + return new ConfigKey[] {ActionTimeout, SyncThreadPoolSize}; } public List getOutOfBandManagementDrivers() { @@ -552,4 +542,36 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf public void setOutOfBandManagementDrivers(List outOfBandManagementDrivers) { this.outOfBandManagementDrivers = outOfBandManagementDrivers; } + + private final class OutOfBandManagementPowerStatePollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("Host out-of-band management power state poll task is running..."); + } + final List outOfBandManagementHosts = outOfBandManagementDao.findAllByManagementServer(ManagementServerNode.getManagementServerId()); + if (outOfBandManagementHosts == null || outOfBandManagementHosts.isEmpty()) { + return; + } + for (final OutOfBandManagement outOfBandManagementHost : outOfBandManagementHosts) { + final Host host = hostDao.findById(outOfBandManagementHost.getHostId()); + if (host == null) { + continue; + } + if (isOutOfBandManagementEnabled(host)) { + submitBackgroundPowerSyncTask(host); + } else if (outOfBandManagementHost.getPowerState() != OutOfBandManagement.PowerState.Disabled) { + if (transitionPowerStateToDisabled(Collections.singletonList(host))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Out-of-band management was disabled in zone/cluster/host, disabled power state for host id:" + host.getId()); + } + } + } + } + } catch (Throwable t) { + LOG.error("Error trying to retrieve host out-of-band management stats", t); + } + } + } } diff --git a/server/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementBackgroundTask.java b/server/src/org/apache/cloudstack/outofbandmanagement/PowerOperationTask.java similarity index 80% rename from server/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementBackgroundTask.java rename to server/src/org/apache/cloudstack/outofbandmanagement/PowerOperationTask.java index 38e139ec610..9c346d76c5f 100644 --- a/server/src/org/apache/cloudstack/outofbandmanagement/OutOfBandManagementBackgroundTask.java +++ b/server/src/org/apache/cloudstack/outofbandmanagement/PowerOperationTask.java @@ -20,14 +20,14 @@ package org.apache.cloudstack.outofbandmanagement; import com.cloud.host.Host; import org.apache.log4j.Logger; -public class OutOfBandManagementBackgroundTask implements Runnable { - public static final Logger LOG = Logger.getLogger(OutOfBandManagementBackgroundTask.class); +public class PowerOperationTask implements Runnable { + public static final Logger LOG = Logger.getLogger(PowerOperationTask.class); final private OutOfBandManagementService service; final private Host host; final private OutOfBandManagement.PowerOperation powerOperation; - public OutOfBandManagementBackgroundTask(OutOfBandManagementService service, Host host, OutOfBandManagement.PowerOperation powerOperation) { + public PowerOperationTask(OutOfBandManagementService service, Host host, OutOfBandManagement.PowerOperation powerOperation) { this.service = service; this.host = host; this.powerOperation = powerOperation; @@ -41,7 +41,7 @@ public class OutOfBandManagementBackgroundTask implements Runnable { @Override public void run() { try { - service.executeOutOfBandManagementPowerOperation(host, powerOperation, null); + service.executePowerOperation(host, powerOperation, null); } catch (Exception e) { LOG.warn(String.format("Out-of-band management background task operation=%s for host id=%d failed with: %s", powerOperation.name(), host.getId(), e.getMessage())); diff --git a/server/src/org/apache/cloudstack/poll/BackgroundPollManagerImpl.java b/server/src/org/apache/cloudstack/poll/BackgroundPollManagerImpl.java new file mode 100644 index 00000000000..c0a7f1c3957 --- /dev/null +++ b/server/src/org/apache/cloudstack/poll/BackgroundPollManagerImpl.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.poll; + +import com.cloud.utils.component.Manager; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.concurrency.NamedThreadFactory; +import com.cloud.utils.exception.CloudRuntimeException; +import com.google.common.base.Preconditions; +import org.apache.log4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +public final class BackgroundPollManagerImpl extends ManagerBase implements BackgroundPollManager, Manager { + public static final Logger LOG = Logger.getLogger(BackgroundPollManagerImpl.class); + + private ScheduledExecutorService backgroundPollTaskScheduler; + private List submittedTasks = new ArrayList<>(); + private volatile boolean isConfiguredAndStarted = false; + + public long getInitialDelay() { + return 5000L; + } + + public long getRoundDelay() { + return 4000L; + } + + @Override + public boolean start() { + if (isConfiguredAndStarted) { + return true; + } + backgroundPollTaskScheduler = Executors.newScheduledThreadPool(submittedTasks.size() + 1, new NamedThreadFactory("BackgroundTaskPollManager")); + for (final BackgroundPollTask task : submittedTasks) { + backgroundPollTaskScheduler.scheduleWithFixedDelay(task, getInitialDelay(), getRoundDelay(), TimeUnit.MILLISECONDS); + LOG.debug("Scheduled background poll task: " + task.getClass().getName()); + } + isConfiguredAndStarted = true; + return true; + } + + @Override + public boolean stop() { + if (isConfiguredAndStarted) { + backgroundPollTaskScheduler.shutdown(); + } + return true; + } + + @Override + public void submitTask(final BackgroundPollTask task) { + Preconditions.checkNotNull(task); + if (isConfiguredAndStarted) { + throw new CloudRuntimeException("Background Poll Manager cannot accept poll task as it has been configured and started."); + } + LOG.debug("Background Poll Manager received task: " + task.getClass().getSimpleName()); + submittedTasks.add(task); + } +} diff --git a/server/test/org/apache/cloudstack/poll/BackgroundPollManagerImplTest.java b/server/test/org/apache/cloudstack/poll/BackgroundPollManagerImplTest.java new file mode 100644 index 00000000000..3304abaf611 --- /dev/null +++ b/server/test/org/apache/cloudstack/poll/BackgroundPollManagerImplTest.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.poll; + +import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class BackgroundPollManagerImplTest { + + private BackgroundPollManagerImpl pollManager; + private DummyPollTask pollTask; + + private class DummyPollTask extends ManagedContextRunnable implements BackgroundPollTask { + private boolean didIRun = false; + private long counter = 0; + + public boolean didItRan() { + return didIRun; + } + + public long getCounter() { + return counter; + } + + @Override + protected void runInContext() { + didIRun = true; + counter++; + } + } + + @Before + public void setUp() throws Exception { + pollManager = new BackgroundPollManagerImpl(); + pollTask = new DummyPollTask(); + } + + @After + public void tearDown() throws Exception { + pollManager.stop(); + } + + @Test + public void testSubmitValidTask() throws Exception { + Assert.assertFalse(pollTask.didItRan()); + Assert.assertTrue(pollTask.getCounter() == 0); + + pollManager.submitTask(pollTask); + pollManager.start(); + Thread.sleep(pollManager.getInitialDelay()*2); + + Assert.assertTrue(pollTask.didItRan()); + Assert.assertTrue(pollTask.getCounter() > 0); + } + + @Test(expected = NullPointerException.class) + public void testSubmitNullTask() throws Exception { + pollManager.submitTask(null); + } + +} \ No newline at end of file diff --git a/setup/db/db/schema-452to453.sql b/setup/db/db/schema-452to453.sql index 7525ce4d31c..6d5f9b3a25e 100644 --- a/setup/db/db/schema-452to453.sql +++ b/setup/db/db/schema-452to453.sql @@ -151,3 +151,106 @@ CREATE VIEW `cloud`.`host_view` AS and async_job.job_status = 0 left join `cloud`.`oobm` ON oobm.host_id = host.id; + +-- Host HA feature +CREATE TABLE IF NOT EXISTS `cloud`.`ha_config` ( + `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `resource_id` bigint(20) unsigned DEFAULT NULL COMMENT 'id of the resource', + `resource_type` varchar(255) NOT NULL COMMENT 'the type of the resource', + `enabled` int(1) unsigned DEFAULT '0' COMMENT 'is HA enabled for the resource', + `ha_state` varchar(255) DEFAULT 'Disabled' COMMENT 'HA state', + `provider` varchar(255) DEFAULT NULL COMMENT 'HA provider', + `update_count` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT 'state based incr-only counter for atomic ha_state updates', + `update_time` datetime COMMENT 'last ha_state update datetime', + `mgmt_server_id` bigint(20) unsigned DEFAULT NULL COMMENT 'management server id that is responsible for the HA for the resource', + PRIMARY KEY (`id`), + KEY `i_ha_config__enabled` (`enabled`), + KEY `i_ha_config__ha_state` (`ha_state`), + KEY `i_ha_config__mgmt_server_id` (`mgmt_server_id`), + UNIQUE KEY (`resource_id`, `resource_type`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +DELETE from `cloud`.`configuration` where name='outofbandmanagement.sync.interval'; + +-- Host HA specific HostJoinDao/VO changes: +DROP VIEW IF EXISTS `cloud`.`host_view`; +CREATE VIEW `cloud`.`host_view` AS + select + host.id, + host.uuid, + host.name, + host.status, + host.disconnected, + host.type, + host.private_ip_address, + host.version, + host.hypervisor_type, + host.hypervisor_version, + host.capabilities, + host.last_ping, + host.created, + host.removed, + host.resource_state, + host.mgmt_server_id, + host.cpu_sockets, + host.cpus, + host.speed, + host.ram, + cluster.id cluster_id, + cluster.uuid cluster_uuid, + cluster.name cluster_name, + cluster.cluster_type, + data_center.id data_center_id, + data_center.uuid data_center_uuid, + data_center.name data_center_name, + data_center.networktype data_center_type, + host_pod_ref.id pod_id, + host_pod_ref.uuid pod_uuid, + host_pod_ref.name pod_name, + host_tags.tag, + guest_os_category.id guest_os_category_id, + guest_os_category.uuid guest_os_category_uuid, + guest_os_category.name guest_os_category_name, + mem_caps.used_capacity memory_used_capacity, + mem_caps.reserved_capacity memory_reserved_capacity, + cpu_caps.used_capacity cpu_used_capacity, + cpu_caps.reserved_capacity cpu_reserved_capacity, + async_job.id job_id, + async_job.uuid job_uuid, + async_job.job_status job_status, + async_job.account_id job_account_id, + oobm.enabled AS `oobm_enabled`, + oobm.power_state AS `oobm_power_state`, + ha_config.enabled AS `ha_enabled`, + ha_config.ha_state AS `ha_state`, + ha_config.provider AS `ha_provider` + from + `cloud`.`host` + left join + `cloud`.`cluster` ON host.cluster_id = cluster.id + left join + `cloud`.`data_center` ON host.data_center_id = data_center.id + left join + `cloud`.`host_pod_ref` ON host.pod_id = host_pod_ref.id + left join + `cloud`.`host_details` ON host.id = host_details.host_id + and host_details.name = 'guest.os.category.id' + left join + `cloud`.`guest_os_category` ON guest_os_category.id = CONVERT( host_details.value , UNSIGNED) + left join + `cloud`.`host_tags` ON host_tags.host_id = host.id + left join + `cloud`.`op_host_capacity` mem_caps ON host.id = mem_caps.host_id + and mem_caps.capacity_type = 0 + left join + `cloud`.`op_host_capacity` cpu_caps ON host.id = cpu_caps.host_id + and cpu_caps.capacity_type = 1 + left join + `cloud`.`async_job` ON async_job.instance_id = host.id + and async_job.instance_type = 'Host' + and async_job.job_status = 0 + left join + `cloud`.`oobm` ON oobm.host_id = host.id + left join + `cloud`.`ha_config` ON ha_config.resource_id=host.id + and ha_config.resource_type='Host'; diff --git a/setup/db/templates.simulator.sql b/setup/db/templates.simulator.sql index 25e91bd9c5f..b26a7b191d4 100755 --- a/setup/db/templates.simulator.sql +++ b/setup/db/templates.simulator.sql @@ -19,4 +19,7 @@ INSERT INTO `cloud`.`vm_template` (id, uuid, unique_name, name, public, created, type, hvm, bits, account_id, url, checksum, enable_password, display_text, format, guest_os_id, featured, cross_zones, hypervisor_type, state) VALUES (100, UUID(), 'simulator-domR', 'SystemVM Template (simulator)', 0, now(), 'SYSTEM', 0, 64, 1, 'http://nfs1.lab.vmops.com/templates/routing/debian/latest/systemvm.vhd.bz2', '', 0, 'SystemVM Template (simulator)', 'VHD', 15, 0, 1, 'Simulator','Active'); INSERT INTO `cloud`.`vm_template` (id, uuid, unique_name, name, public, created, type, hvm, bits, account_id, url, checksum, enable_password, display_text, format, guest_os_id, featured, cross_zones, hypervisor_type,state) - VALUES (111, UUID(), 'simulator-Centos', 'CentOS 5.3(64-bit) no GUI (Simulator)', 1, now(), 'BUILTIN', 0, 64, 1, 'http://nfs1.lab.vmops.com/templates/centos53-x86_64/latest/f59f18fb-ae94-4f97-afd2-f84755767aca.vhd.bz2', '', 0, 'CentOS 5.3(64-bit) no GUI (Simulator)', 'VHD', 11, 1, 1, 'Simulator','Active'); + VALUES (111, UUID(), 'simulator-Centos', 'CentOS 5.3(64-bit) no GUI (Simulator)', 1, now(), 'BUILTIN', 0, 64, 1, 'http://nfs1.lab.vmops.com/templates/centos53-x86_64/latest/f59f18fb-ae94-4f97-afd2-f84755767aca.vhd.bz2', '', 0, 'CentOS 5.3(64-bit) no GUI (Simulator)', 'VHD', 142, 1, 1, 'Simulator','Active'); + +INSERT INTO `cloud`.`template_store_ref` VALUES (1,1,111,NOW(),NOW(),NULL,100,0,'Image',0,'DOWNLOADED',NULL,NULL,'template/tmpl/1/111/','http://fake.cloud/111.vhd.bz2','Ready',0,0,0,0,NULL,NULL,NULL); +INSERT INTO `cloud`.`template_store_ref` VALUES (2,1,100,NOW(),NOW(),NULL,100,0,'Image',0,'DOWNLOADED',NULL,NULL,'template/tmpl/1/100/','http://fake.cloud/111.vhd.bz2','Ready',0,0,0,0,NULL,NULL,NULL); diff --git a/setup/dev/advanced.cfg b/setup/dev/advanced.cfg index 84175fd787f..cd4a0a96ae9 100644 --- a/setup/dev/advanced.cfg +++ b/setup/dev/advanced.cfg @@ -107,6 +107,11 @@ "username": "root", "url": "http://sim/c1/h0", "password": "password" + }, + { + "username": "root", + "url": "http://sim/c1/h1", + "password": "password" } ], "clustertype": "CloudManaged", @@ -114,6 +119,10 @@ { "url": "nfs://10.147.28.6:/export/home/sandbox/primary2", "name": "PS2" + }, + { + "url": "nfs://10.147.28.6:/export/home/sandbox/primary3", + "name": "PS2" } ] } @@ -213,10 +222,6 @@ { "name": "ping.timeout", "value": "1.5" - }, - { - "name": "outofbandmanagement.sync.interval", - "value": "2000" } ], "mgtSvr": [ diff --git a/test/integration/component/test_host_ha.py b/test/integration/component/test_host_ha.py index 6361564e816..2af5ea93eb7 100644 --- a/test/integration/component/test_host_ha.py +++ b/test/integration/component/test_host_ha.py @@ -84,7 +84,7 @@ class TestHostHA(cloudstackTestCase): "sleep": 60, "timeout": 10, } - + def tearDown(self): try: @@ -95,20 +95,20 @@ class TestHostHA(cloudstackTestCase): raise Exception("Warning: Exception during cleanup : %s" % e) return - + + def createVMs(self, hostId, number, local): - self.template = get_template( self.apiclient, self.zone.id, self.services["ostype"] ) - + if self.template == FAILED: assert False, "get_template() failed to return template with description %s" % self.services["ostype"] - + self.logger.debug("Using template %s " % self.template.id) - + if local: self.service_offering = ServiceOffering.create( self.apiclient, @@ -119,10 +119,9 @@ class TestHostHA(cloudstackTestCase): self.apiclient, self.services["service_offering"] ) - - + self.logger.debug("Using service offering %s " % self.service_offering.id) - + vms = [] for i in range(0, number): self.services["vm"]["zoneid"] = self.zone.id @@ -139,7 +138,7 @@ class TestHostHA(cloudstackTestCase): self.cleanup.append(vm) self.logger.debug("VM create = {}".format(vm.id)) return vm - + def noOfVMsOnHost(self, hostId): listVms = VirtualMachine.list( self.apiclient, @@ -150,12 +149,12 @@ class TestHostHA(cloudstackTestCase): for vm in listVms: self.logger.debug('VirtualMachine on Hyp 1 = {}'.format(vm.id)) vmnos = vmnos + 1 - + return vmnos - + def checkHostDown(self, fromHostIp, testHostIp): try: - ssh = SshClient(fromHostIp, 22, "root", "password") + ssh = SshClient(fromHostIp, 22, "root", "password") res = ssh.execute("ping -c 1 %s" % testHostIp) result = str(res) if result.count("100% packet loss") == 1: @@ -165,10 +164,10 @@ class TestHostHA(cloudstackTestCase): except Exception as e: self.logger.debug("Got exception %s" % e) return False, 1 - + def checkHostUp(self, fromHostIp, testHostIp): try: - ssh = SshClient(fromHostIp, 22, "root", "password") + ssh = SshClient(fromHostIp, 22, "root", "password") res = ssh.execute("ping -c 1 %s" % testHostIp) result = str(res) if result.count(" 0% packet loss") == 1: @@ -178,8 +177,8 @@ class TestHostHA(cloudstackTestCase): except Exception as e: self.logger.debug("Got exception %s" % e) return False, 1 - - + + def isOnlyNFSStorageAvailable(self): if self.zone.localstorageenabled: return False @@ -196,13 +195,13 @@ class TestHostHA(cloudstackTestCase): for storage_pool in storage_pools: if storage_pool.type == u'NetworkFilesystem': return True - + return False - + def isOnlyLocalStorageAvailable(self): if not(self.zone.localstorageenabled): return False - + storage_pools = StoragePool.list( self.apiclient, zoneid=self.zone.id, @@ -216,13 +215,13 @@ class TestHostHA(cloudstackTestCase): for storage_pool in storage_pools: if storage_pool.type == u'NetworkFilesystem': return False - + return True - + def isLocalAndNFSStorageAvailable(self): if not(self.zone.localstorageenabled): return False - + storage_pools = StoragePool.list( self.apiclient, zoneid=self.zone.id, @@ -236,10 +235,10 @@ class TestHostHA(cloudstackTestCase): for storage_pool in storage_pools: if storage_pool.type == u'NetworkFilesystem': return True - + return False - - + + def checkHostStateInCloudstack(self, state, hostId): try: listHost = Host.list( @@ -254,7 +253,7 @@ class TestHostHA(cloudstackTestCase): True, "Check if listHost returns a valid response" ) - + self.assertEqual( len(listHost), 1, @@ -268,19 +267,30 @@ class TestHostHA(cloudstackTestCase): except Exception as e: self.logger.debug("Got exception %s" % e) return False, 1 - - + + def disconnectHostfromNetwork(self, hostIp, timeout): srcFile = os.path.dirname(os.path.realpath(__file__)) + "/test_host_ha.sh" if not(os.path.isfile(srcFile)): self.logger.debug("File %s not found" % srcFile) raise unittest.SkipTest("Script file %s required for HA not found" % srcFile); - + ssh = SshClient(hostIp, 22, "root", "password") ssh.scp(srcFile, "/root/test_host_ha.sh") - ssh.execute("nohup sh /root/test_host_ha.sh %s > /dev/null 2>&1 &\n" % timeout) + ssh.execute("nohup sh /root/test_host_ha.sh -t %s -d all > /dev/null 2>&1 &\n" % timeout) return - + + def stopAgentOnHost(self, hostIp, timeout): + srcFile = os.path.dirname(os.path.realpath(__file__)) + "/test_host_ha.sh" + if not(os.path.isfile(srcFile)): + self.logger.debug("File %s not found" % srcFile) + raise unittest.SkipTest("Script file %s required for HA not found" % srcFile); + + ssh = SshClient(hostIp, 22, "root", "password") + ssh.scp(srcFile, "/root/test_host_ha.sh") + ssh.execute("nohup sh /root/test_host_ha.sh -t %s -d agent > /dev/null 2>&1 &\n" % timeout) + return + @attr( tags=[ @@ -292,11 +302,13 @@ class TestHostHA(cloudstackTestCase): "sg"], required_hardware="true") def test_01_host_ha_with_nfs_storagepool_with_vm(self): - + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return + if not(self.isOnlyNFSStorageAvailable()): raise unittest.SkipTest("Skipping this test as this is for NFS store only."); return - + listHost = Host.list( self.apiclient, type='Routing', @@ -305,61 +317,61 @@ class TestHostHA(cloudstackTestCase): ) for host in listHost: self.logger.debug('Hypervisor = {}'.format(host.id)) - - + + if len(listHost) != 2: self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); return - + no_of_vms = self.noOfVMsOnHost(listHost[0].id) - + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) - + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) - - + + if no_of_vms < 5: self.logger.debug("test_01: Create VMs as there are not enough vms to check host ha") no_vm_req = 5 - no_of_vms if (no_vm_req > 0): self.logger.debug("Creating vms = {}".format(no_vm_req)) self.vmlist = self.createVMs(listHost[0].id, no_vm_req, False) - + ha_host = listHost[1] other_host = listHost[0] if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): ha_host = listHost[0] other_host = listHost[1] - + self.disconnectHostfromNetwork(ha_host.ipaddress, 400) - + hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress) - if not(hostDown): + if not(hostDown): raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress)) - + hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id) #the test could have failed here but we will try our best to get host back in consistent state - + no_of_vms = self.noOfVMsOnHost(ha_host.id) no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) # hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress) - if not(hostUp): + if not(hostUp): self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress)) - - + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) - - if not(hostDownInCloudstack): + + if not(hostDownInCloudstack): raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress)) - if not(hostUpInCloudstack): + if not(hostUpInCloudstack): raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) - + return - + @attr( tags=[ "advanced", @@ -370,11 +382,12 @@ class TestHostHA(cloudstackTestCase): "sg"], required_hardware="true") def test_02_host_ha_with_local_storage_and_nfs(self): - + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return if not(self.isLocalAndNFSStorageAvailable()): raise unittest.SkipTest("Skipping this test as this is for Local storage and NFS storage only."); return - + listHost = Host.list( self.apiclient, type='Routing', @@ -383,62 +396,62 @@ class TestHostHA(cloudstackTestCase): ) for host in listHost: self.logger.debug('Hypervisor = {}'.format(host.id)) - - + + if len(listHost) != 2: self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); return - + no_of_vms = self.noOfVMsOnHost(listHost[0].id) - + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) - + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) - - + + if no_of_vms < 5: self.logger.debug("test_02: Create VMs as there are not enough vms to check host ha") no_vm_req = 5 - no_of_vms if (no_vm_req > 0): self.logger.debug("Creating vms = {}".format(no_vm_req)) self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True) - + ha_host = listHost[1] other_host = listHost[0] if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): ha_host = listHost[0] other_host = listHost[1] - + self.disconnectHostfromNetwork(ha_host.ipaddress, 400) - + hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress) - if not(hostDown): + if not(hostDown): raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress)) - + hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id) #the test could have failed here but we will try our best to get host back in consistent state - + no_of_vms = self.noOfVMsOnHost(ha_host.id) no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) # hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress) - if not(hostUp): + if not(hostUp): self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress)) - - + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) - - if not(hostDownInCloudstack): + + if not(hostDownInCloudstack): raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress)) - if not(hostUpInCloudstack): + if not(hostUpInCloudstack): raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) - + return - - - + + + @attr( tags=[ "advanced", @@ -449,11 +462,13 @@ class TestHostHA(cloudstackTestCase): "sg"], required_hardware="true") def test_03_host_ha_with_only_local_storage(self): - + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return + if not(self.isOnlyLocalStorageAvailable()): raise unittest.SkipTest("Skipping this test as this is for Local storage only."); return - + listHost = Host.list( self.apiclient, type='Routing', @@ -462,55 +477,125 @@ class TestHostHA(cloudstackTestCase): ) for host in listHost: self.logger.debug('Hypervisor = {}'.format(host.id)) - - + + if len(listHost) != 2: self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); return - + no_of_vms = self.noOfVMsOnHost(listHost[0].id) - + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) - + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) - + if no_of_vms < 5: self.logger.debug("test_03: Create VMs as there are not enough vms to check host ha") no_vm_req = 5 - no_of_vms if (no_vm_req > 0): self.logger.debug("Creating vms = {}".format(no_vm_req)) self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True) - + ha_host = listHost[1] other_host = listHost[0] if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): ha_host = listHost[0] other_host = listHost[1] - + self.disconnectHostfromNetwork(ha_host.ipaddress, 400) - + hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress) - if not(hostDown): + if not(hostDown): raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress)) - + hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Alert", ha_host.id) #the test could have failed here but we will try our best to get host back in consistent state - + no_of_vms = self.noOfVMsOnHost(ha_host.id) no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) # hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress) - if not(hostUp): + if not(hostUp): self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress)) - - + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) - - if not(hostDownInCloudstack): + + if not(hostDownInCloudstack): raise self.fail("Host is not in alert %s, in cloudstack so failing test " % (ha_host.ipaddress)) - if not(hostUpInCloudstack): + if not(hostUpInCloudstack): raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) - - return \ No newline at end of file + + return + + + @attr( + tags=[ + "advanced", + "advancedns", + "smoke", + "basic", + "eip", + "sg"], + required_hardware="true") + def test_04_host_ha_vmactivity_check(self): + + if not(self.isOnlyNFSStorageAvailable()): + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return + + listHost = Host.list( + self.apiclient, + type='Routing', + zoneid=self.zone.id, + podid=self.pod.id, + ) + for host in listHost: + self.logger.debug('Hypervisor = {}'.format(host.id)) + + + if len(listHost) != 2: + self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); + raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); + return + + no_of_vms = self.noOfVMsOnHost(listHost[0].id) + + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) + + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) + + + if no_of_vms < 5: + self.logger.debug("test_01: Create VMs as there are not enough vms to check host ha") + no_vm_req = 5 - no_of_vms + if (no_vm_req > 0): + self.logger.debug("Creating vms = {}".format(no_vm_req)) + self.vmlist = self.createVMs(listHost[0].id, no_vm_req, False) + + ha_host = listHost[1] + other_host = listHost[0] + if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): + ha_host = listHost[0] + other_host = listHost[1] + + self.stopAgentOnHost(ha_host.ipaddress, 150) + + hostDisconnectedInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Disconnected", ha_host.id) + #the test could have failed here but we will try our best to get host back in consistent state + + no_of_vms = self.noOfVMsOnHost(ha_host.id) + no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) + # + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) + + if not(hostDisconnectedInCloudstack): + raise self.fail("Host is not disconnected %s, in cloudstack so failing test " % (ha_host.ipaddress)) + if not(hostUpInCloudstack): + raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) + + return diff --git a/test/integration/component/test_host_ha.sh b/test/integration/component/test_host_ha.sh index 85aadb1b688..b27038840c9 100755 --- a/test/integration/component/test_host_ha.sh +++ b/test/integration/component/test_host_ha.sh @@ -1,40 +1,100 @@ #!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. -#bring down all eth interfaces +set -x -usage() { echo "Usage: $0 "; exit 1; } +usage() { echo "Usage: $0 -d -t "; exit 1; } -case $1 in +Interval= +Down= +while getopts 'd:t:' OPTION +do + case $OPTION in + d) + Down="$OPTARG" + ;; + t) + Interval="$OPTARG" + ;; + *) + usage + ;; + esac +done + + +if [ -z $Interval ]; then + usage +fi + + +if [ "$Down" != 'all' ]; then + if [ "$Down" != 'agent' ]; then + usage + fi +fi + +case $Interval in ''|*[!0-9]*) echo "The parameter should be an integer"; exit ;; *) echo $1 ;; esac -if [ -z $1 ]; then - usage -elif [ $1 -lt 1 ]; then +if [ $Interval -lt 1 ]; then echo "Down time should be at least 1 second" exit 1 -elif [ $1 -gt 5000 ]; then +elif [ $Interval -gt 5000 ]; then echo "Down time should be less than 5000 second" exit 1 fi -for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth` + +for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep "^eth.$"` do - ifconfig $i down + ifconfig $i down done service cloudstack-agent stop update-rc.d -f cloudstack-agent remove -sleep $1 +sleep 1 -for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth` +if [ "$Down" = 'agent' ]; then + for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep "^eth.$"` + do + ifconfig $i up + done +fi + +counter=$Interval +while [ $counter -gt 0 ] do - ifconfig $i up + sleep 1 + counter=$(( $counter - 1 )) done +if [ "$Down" = 'all' ]; then + for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth` + do + ifconfig $i up + done +fi + update-rc.d -f cloudstack-agent defaults -service cloudstack-agent start \ No newline at end of file +service cloudstack-agent start diff --git a/test/integration/smoke/test_ha_for_host.py b/test/integration/smoke/test_ha_for_host.py new file mode 100644 index 00000000000..efc4f1f1b41 --- /dev/null +++ b/test/integration/smoke/test_ha_for_host.py @@ -0,0 +1,247 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from marvin.cloudstackTestCase import * +from marvin.cloudstackAPI import * +from marvin.lib.utils import * +from marvin.lib.common import * +from nose.plugins.attrib import attr +import cmd +from cmd import Cmd + + +class TestHaForHost(cloudstackTestCase): + """ Test cases for configuring HA for Host + """ + + def setUp(self): + testClient = super(TestHaForHost, self).getClsTestClient() + + self.apiclient = testClient.getApiClient() + self.dbclient = testClient.getDbConnection() + self.services = testClient.getParsedTestDataConfig() + + self.zone = get_zone(self.apiclient, testClient.getZoneForTests()) + self.host = None + self.server = None + + self.cleanup = [] + + def tearDown(self): + try: + self.dbclient.execute("delete from ha_config where resource_type='Host'") + cleanup_resources(self.apiclient, self.cleanup) + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + + def getHost(self, hostId=None): + if self.host and hostId is None: + return self.host + + response = list_hosts( + self.apiclient, + zoneid=self.zone.id, + type='Routing', + id=hostId + ) + if len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No hosts found, skipping HA for Host test") + + + def getHaProvider(self, host): + cmd = listHostHAProviders.listHostHAProvidersCmd() + cmd.hypervisor = host.hypervisor + response = self.apiclient.listHostHAProviders(cmd) + return response[0].haprovider + + + def configureHaProvider(self): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.hostid = self.getHost().id + cmd.provider = self.getHaProvider(self.getHost()) + return self.apiclient.configureHAForHost(cmd) + + + def getHaForHostEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + + def getHaForHostDisableCmd(self): + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + + def getListHostHAResources(self): + cmd = listHostHAResources.listHostHAResourcesCmd() + cmd.hostid = self.getHost().id + return cmd + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_enable_ha_for_host(self): + """ + This test enables HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_enable_ha_for_host_invalid(self): + """ + This is a negative test for enable HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostEnableCmd() + cmd.hostid = -1 + + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_disable_ha_for_host(self): + """ + This test disables HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostDisableCmd() + + response = self.apiclient.disableHAForHost(cmd) + + self.assertTrue(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + response = self.getHost(cmd.hostid) + + self.assertEqual(response.hostha.hastate, "Disabled") + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_disable_ha_for_host_invalid(self): + """ + This is a negative test for disable HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostDisableCmd() + cmd.hostid = -1 + + try: + response = self.apiclient.disableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host(self): + """ + Test that verifies the listHAForHost API + """ + self.configureHaProvider() + db_count = self.dbclient.execute("SELECT count(*) FROM cloud.ha_config") + + cmd = self.getListHostHAResources() + del cmd.hostid + response = self.apiclient.listHostHAResources(cmd) + + self.assertEqual(db_count[0][0], len(response)) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host_valid(self): + """ + Valid test for listing a specific host HA resources + """ + + self.configureHaProvider() + cmd = self.getListHostHAResources() + response = self.apiclient.listHostHAResources(cmd) + self.assertEqual(response[0].hostid, cmd.hostid) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host_invalid(self): + """ + Test that listHostHAResources is returning exception when called with invalid data + """ + + self.configureHaProvider() + cmd = self.getListHostHAResources() + cmd.hostid = "someinvalidvalue" + + try: + response = self.apiclient.listHostHAResources(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") diff --git a/test/integration/smoke/test_ha_kvm.py b/test/integration/smoke/test_ha_kvm.py new file mode 100644 index 00000000000..3dc3515f7fb --- /dev/null +++ b/test/integration/smoke/test_ha_kvm.py @@ -0,0 +1,701 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import marvin +from marvin.cloudstackTestCase import * +from marvin.cloudstackAPI import * +from marvin.lib.utils import * +from marvin.lib.base import * +from marvin.lib.common import * +from nose.plugins.attrib import attr + +import random + +from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer + +import random +import socket +import sys +import thread +import time + + +class TestHAKVM(cloudstackTestCase): + """ Test cases for host HA using KVM host(s) + """ + + def setUp(self): + self.apiclient = self.testClient.getApiClient() + self.hypervisor = self.testClient.getHypervisorInfo() + self.dbclient = self.testClient.getDbConnection() + self.services = self.testClient.getParsedTestDataConfig() + self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ + self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ + self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) + + # Cleanup any existing configs + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.host = self.getHost() + + # use random port for ipmisim + s = socket.socket() + s.bind(('', 0)) + self.serverPort = s.getsockname()[1] + s.close() + + self.cleanup = [] + + def getFakeMsId(self): + return self.fakeMsId + + def getFakeMsRunId(self): + return self.fakeMsId * 1000 + + def tearDown(self): + try: + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort()) + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'") + self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'") + cleanup_resources(self.apiclient, self.cleanup) + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def check_host_transition_to_available(self): + t_end = time.time() + 90 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate == "Available": + return + else: + continue + self.fail(self) + + def getHost(self): + + response = list_hosts( + self.apiclient, + type='Routing', + resourcestate='Enabled' + ) + if response and len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No KVM hosts found, skipping host-ha test") + + def getHost(self, hostId=None): + + response = list_hosts( + self.apiclient, + type='Routing', + id=hostId + ) + if response and len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No KVM hosts found, skipping host-ha test") + + def getHostHaConfigCmd(self, provider='kvmhaprovider'): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.provider = provider + cmd.hostid = self.getHost().id + return cmd + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getHostHaDisableCmd(self): + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def configureAndEnableHostHa(self, initialize=True): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd()) + self.assertEqual(response.haenable, True) + if initialize: + self.configureKVMHAProviderState(True, True, True, False) + + def configureAndDisableHostHa(self, hostId): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + cmd.hostid = hostId + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + def enableHostHa(self, hostId): + cmd = self.getHostHaEnableCmd() + cmd.hostid = hostId + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + return response + + def configureKVMHAProviderState(self, health, activity, recover, fence): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.hostid = self.getHost().id + cmd.health = health + cmd.activity = activity + cmd.recover = recover + cmd.fence = fence + response = self.apiclient.configureKVMHAProviderState(cmd) + self.assertEqual(response.success, 'true') + + def checkSyncToState(self, state, interval=5000): + def checkForStateSync(expectedState): + response = self.getHost(hostId=self.getHost().id).hostha + return response.hastate == expectedState, None + + sync_interval = 1 + int(interval) / 1000 + res, _ = wait_until(sync_interval, 10, checkForStateSync, state) + if not res: + self.fail("Failed to get host.hastate synced to expected state:" + state) + response = self.getHost(hostId=self.getHost().id).hostha + self.assertEqual(response.hastate, state) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_configure_invalid_provider(self): + """ + Tests host-ha configuration with invalid driver + """ + cmd = self.getHostHaConfigCmd() + cmd.provider = 'randomDriverThatDoesNotExist' + try: + response = self.apiclient.configureHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_configure_default_driver(self): + """ + Tests host-ha configuration with valid data + """ + cmd = self.getHostHaConfigCmd() + response = self.apiclient.configureHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haprovider, cmd.provider.lower()) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_enable_feature_invalid(self): + """ + Tests ha feature enable command with invalid options + """ + cmd = self.getHostHaEnableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = enableHAForCluster.enableHAForClusterCmd() + response = self.apiclient.enableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = enableHAForZone.enableHAForZoneCmd() + response = self.apiclient.enableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_disable_feature_invalid(self): + """ + Tests ha feature disable command with invalid options + """ + cmd = self.getHostHaDisableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.disableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = disableHAForCluster.disableHAForClusterCmd() + response = self.apiclient.disableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = disableHAForZone.disableHAForZoneCmd() + response = self.apiclient.disableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_feature_valid(self): + """ + Tests host-ha enable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_disable_feature_valid(self): + """ + Tests host-ha disable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + response = self.getHost(hostId=cmd.hostid).hostha + self.assertEqual(response.hastate, 'Disabled') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_configure_ha_provider_invalid(self): + """ + Tests configure HA Provider with invalid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + # Setup wrong configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + try: + self.apiclient.configureHAForHost(conf_ha_cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_configure_ha_provider_valid(self): + """ + Tests configure HA Provider with valid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + # Setup configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + response = self.apiclient.configureHAForHost(conf_ha_cmd) + + # Check the response contains the set provider and hostID + self.assertEqual(response.haprovider, conf_ha_cmd.provider) + self.assertEqual(response.hostid, conf_ha_cmd.hostid) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_disable_oobm_ha_state_ineligible(self): + """ + Tests that when HA is enabled for a host, if oobm is disabled HA State should turn into Ineligible + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + # Disable OOBM + self.apiclient.configureOutOfBandManagement(self.getOobmConfigCmd()) + oobm_cmd = self.getOobmDisableCmd() + oobm_cmd.hostid = cmd.hostid + response = self.apiclient.disableOutOfBandManagementForHost(oobm_cmd) + self.assertEqual(response.hostid, oobm_cmd.hostid) + self.assertEqual(response.enabled, False) + + response = self.getHost(hostId=cmd.hostid).outofbandmanagement + self.assertEqual(response.powerstate, 'Disabled') + + # Verify HA State is Ineligeble + response = self.getHost(hostId=cmd.hostid).hostha + self.assertEqual(response.hastate, "Ineligible") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_configure_default_driver(self): + """ + Tests host-ha configuration with valid data + """ + cmd = self.getHostHaConfigCmd() + response = self.apiclient.configureHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haprovider, cmd.provider.lower()) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_enable_ha_when_host_powerstate_on(self): + """ + Tests that when HA is enabled for a host, if oobm state is on HA State should turn into Available + """ + + self.configureAndStartIpmiServer() + + self.assertIssueCommandState('ON', 'On') + + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + # Verify HA State is Available + self.check_host_transition_to_available() + + response = self.getHost() + if response.hostha.hastate is not "Available": + print response + + self.assertEqual(response.hostha.hastate, "Available") + + self.stopIpmiServer() + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_feature_without_setting_provider(self): + """ + Tests Enable HA without setting the provider, Exception is thrown + """ + host = self.get_non_configured_ha_host() + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + + try: + self.apiclient.enableHAForHost(cmd) + except Exception as e: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="treu") + def test_hostha_enable_ha_when_host_disabled(self): + """ + Tests Enable HA when host is disconnected, should be Ineligible + """ + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = self.host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + # Disable Host + self.disableHost(self.host.id) + + # Check HA State + try: + response = self.getHost(self.host.id) + self.assertEqual(response.hostha.hastate, "Ineligible") + except Exception as e: + self.enableHost(self.host.id) + self.fail(e) + + # Enable Host + self.enableHost(self.host.id) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_ha_when_host_inMaintenance(self): + """ + Tests Enable HA when host is in Maintenance mode, should be Ineligible + """ + + host = self.getHost() + + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + # Prepare for maintenance Host + self.setHostToMaintanance(host.id) + + # Check HA State + try: + response = self.getHost(host.id) + self.assertEqual(response.hostha.hastate, "Ineligible") + except Exception as e: + self.cancelMaintenance(host.id) + self.fail(e) + + # Enable Host + self.cancelMaintenance(host.id) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_ha_when_host_disconected(self): + """ + Tests Enable HA when host is disconnected, should be Ineligible + """ + host = self.getHost() + + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + # Make Host Disconnected + self.killAgent() + + # Check HA State + try: + time.sleep(1) + response = self.getHost(self.host.id) + self.assertEqual(response.hostha.hastate, "Ineligible") + except Exception as e: + self.startAgent() + self.fail(e) + + # Enable Host + self.startAgent() + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_remove_ha_provider_not_possible(self): + """ + Tests HA Provider should be possible to be removed when HA is enabled + """ + + host = self.getHost() + + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + try: + self.apiclient.configureHAForHost(self.getHostHaConfigCmd('')) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + def configureAndStartIpmiServer(self, power_state=None): + """ + Setup ipmisim and enable out-of-band management for host + """ + self.configureAndEnableOobm() + self.startIpmiServer() + if power_state: + bmc = IpmiServerContext().bmc + bmc.powerstate = power_state + + def assertIssueCommandState(self, command, expected): + """ + Asserts power action result for a given power command + """ + if command != 'STATUS': + self.issuePowerActionCmd(command) + response = self.issuePowerActionCmd('STATUS') + self.assertEqual(response.powerstate, expected) + + def configureAndEnableOobm(self): + self.apiclient.configureOutOfBandManagement(self.getOobmConfigCmd()) + response = self.apiclient.enableOutOfBandManagementForHost(self.getOobmEnableCmd()) + self.assertEqual(response.enabled, True) + + def startIpmiServer(self): + def startIpmiServer(tname, server): + self.debug("Starting ipmisim server") + try: + server.serve_forever() + except Exception: pass + IpmiServerContext('reset') + ThreadedIpmiServer.allow_reuse_address = False + server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer) + thread.start_new_thread(startIpmiServer, ("ipmi-server", server,)) + self.server = server + + def stopIpmiServer(self): + if self.server: + self.server.shutdown() + self.server.server_close() + + def getOobmIssueActionCmd(self): + cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd() + cmd.hostid = self.getHost().id + cmd.action = 'STATUS' + return cmd + + def issuePowerActionCmd(self, action, timeout=None): + cmd = self.getOobmIssueActionCmd() + cmd.action = action + if timeout: + cmd.timeout = timeout + + try: + return self.apiclient.issueOutOfBandManagementPowerAction(cmd) + except Exception as e: + if "packet session id 0x0 does not match active session" in str(e): + raise self.skipTest("Known ipmitool issue hit, skipping test") + raise e + + def getOobmEnableCmd(self): + cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getOobmDisableCmd(self): + cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getIpmiServerPort(self): + return self.serverPort + + def getOobmConfigCmd(self): + cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd() + cmd.driver = 'ipmitool' # The default available driver + cmd.address = self.getIpmiServerIp() + cmd.port = self.getIpmiServerPort() + cmd.username = 'admin' + cmd.password = 'password' + cmd.hostid = self.getHost().id + return cmd + + def getIpmiServerIp(self): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"])) + return s.getsockname()[0] + + def get_non_configured_ha_host(self): + + response = list_hosts( + self.apiclient, + type='Routing' + ) + + for host in response: + if host.haprovider is None: + return host + else: + cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.") + + raise self.skipTest("No KVM hosts found, skipping host-ha test") + + def getHAState(self, id): + cmd = listHostHAResources.listHostHAResourcesCmd() + cmd.hostid = id + response = self.apiclient.listHostHAResources(cmd) + + return response[0] + + def startAgent(self): + host = self.getHost() + SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("service cloudstack-agent start") + + def disableHost(self, id): + + cmd = updateHost.updateHostCmd() + cmd.id = id + cmd.allocationstate = "Disable" + + response = self.apiclient.updateHost(cmd) + + self.assertEqual(response.resourcestate, "Disabled") + + def enableHost(self, id): + cmd = updateHost.updateHostCmd() + cmd.id = id + cmd.allocationstate = "Enable" + + response = self.apiclient.updateHost(cmd) + + self.assertEqual(response.resourcestate, "Enabled") + + def setHostToMaintanance(self, id): + cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd() + cmd.id = id + + response = self.apiclient.prepareHostForMaintenance(cmd) + + self.assertEqual(response.resourcestate, "PrepareForMaintenance") + + def cancelMaintenance(self, id): + cmd = cancelHostMaintenance.cancelHostMaintenanceCmd() + cmd.id = id + + response = self.apiclient.cancelHostMaintenance(cmd) + + self.assertEqual(response.resourcestate, "Enabled") + + def killAgent(self): + host = self.getHost() + SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ + ("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')") diff --git a/test/integration/smoke/test_ha_kvm_agent.py b/test/integration/smoke/test_ha_kvm_agent.py new file mode 100644 index 00000000000..3efde0a97ec --- /dev/null +++ b/test/integration/smoke/test_ha_kvm_agent.py @@ -0,0 +1,535 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from marvin.cloudstackTestCase import * +from marvin.lib.utils import * +from marvin.lib.base import * +from marvin.lib.common import * +from nose.plugins.attrib import attr + +from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer + +import random +import socket +import thread + + +class TestHaKVMAgent(cloudstackTestCase): + """ Test cases for out of band management + """ + + def setUp(self): + testClient = super(TestHaKVMAgent, self).getClsTestClient() + + self.apiClient = testClient.getApiClient() + self.dbclient = testClient.getDbConnection() + self.services = testClient.getParsedTestDataConfig() + + self.zone = get_zone(self.apiClient, testClient.getZoneForTests()) + self.host = self.getHost() + self.cluster_id = self.host.clusterid + self.server = None + + self.hypervisor = self.testClient.getHypervisorInfo() + self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ + self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ + self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) + + # Cleanup any existing configs + self.dbclient.execute("delete from ha_config where resource_type='Host'") + + # use random port for ipmisim + s = socket.socket() + s.bind(('', 0)) + self.serverPort = s.getsockname()[1] + s.close() + + # Set Cluster-level setting in order to run tests faster + self.update_configuration("kvm.ha.activity.check.failure.ratio", "0.7") + self.update_configuration("kvm.ha.activity.check.interval", "10") + self.update_configuration("kvm.ha.activity.check.max.attempts", "5") + self.update_configuration("kvm.ha.activity.check.timeout", "60") + self.update_configuration("kvm.ha.degraded.max.period", "30") + self.update_configuration("kvm.ha.fence.timeout", "60") + self.update_configuration("kvm.ha.health.check.timeout", "10") + self.update_configuration("kvm.ha.recover.failure.threshold", "1") + self.update_configuration("kvm.ha.recover.timeout", "120") + self.update_configuration("kvm.ha.recover.wait.period", "60") + + self.service_offering = ServiceOffering.create( + self.apiClient, + self.services["service_offerings"] + ) + + self.template = get_template( + self.apiClient, + self.zone.id, + self.services["ostype"] + ) + + self.cleanup = [self.service_offering] + + def tearDown(self): + try: + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort()) + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'") + self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'") + cleanup_resources(self.apiClient, self.cleanup) + if self.server: + self.server.shutdown() + self.server.server_close() + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + def getFakeMsId(self): + return self.fakeMsId + + def getFakeMsRunId(self): + return self.fakeMsId * 1000 + + def getHostHaConfigCmd(self, provider='kvmhaprovider'): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.provider = provider + cmd.hostid = self.host.id + return cmd + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.host.id + return cmd + + def getHost(self, hostId=None): + response = list_hosts( + self.apiClient, + zoneid=self.zone.id, + type='Routing', + id=hostId + ) + if len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No hosts found, skipping out-of-band management test") + + def getIpmiServerIp(self): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"])) + return s.getsockname()[0] + + def getIpmiServerPort(self): + return self.serverPort + + def getOobmConfigCmd(self): + cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd() + cmd.driver = 'ipmitool' # The default available driver + cmd.address = self.getIpmiServerIp() + cmd.port = self.getIpmiServerPort() + cmd.username = 'admin' + cmd.password = 'password' + cmd.hostid = self.host.id + return cmd + + def getOobmEnableCmd(self): + cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd() + cmd.hostid = self.host.id + return cmd + + def getOobmDisableCmd(self): + cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd() + cmd.hostid = self.host.id + return cmd + + def getOobmIssueActionCmd(self): + cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd() + cmd.hostid = self.host.id + cmd.action = 'STATUS' + return cmd + + def issue_power_action_cmd(self, action, timeout=None): + cmd = self.getOobmIssueActionCmd() + cmd.action = action + if timeout: + cmd.timeout = timeout + + try: + return self.apiClient.issueOutOfBandManagementPowerAction(cmd) + except Exception as e: + if "packet session id 0x0 does not match active session" in str(e): + raise self.skipTest("Known ipmitool issue hit, skipping test") + raise e + + def configure_and_enable_oobm(self): + self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd()) + response = self.apiClient.enableOutOfBandManagementForHost(self.getOobmEnableCmd()) + self.assertEqual(response.enabled, True) + + def start_ipmi_server(self): + def startIpmiServer(tname, server): + self.debug("Starting ipmisim server") + try: + server.serve_forever() + except Exception: pass + IpmiServerContext('reset') + ThreadedIpmiServer.allow_reuse_address = False + server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer) + thread.start_new_thread(startIpmiServer, ("ipmi-server", server,)) + self.server = server + + def checkSyncToState(self, state, interval): + def checkForStateSync(expectedState): + response = self.getHost(hostId=self.host.id).outofbandmanagement + return response.powerstate == expectedState, None + + sync_interval = 1 + int(interval)/1000 + res, _ = wait_until(sync_interval, 10, checkForStateSync, state) + if not res: + self.fail("Failed to get host.powerstate synced to expected state:" + state) + response = self.getHost(hostId=self.host.id).outofbandmanagement + self.assertEqual(response.powerstate, state) + + def get_host_in_available_state(self): + + self.configure_and_start_ipmi_server() + self.assert_issue_command_state('ON', 'On') + self.configureAndEnableHostHa() + + self.check_host_transition_to_available() + + response = self.getHost() + if response.hostha.hastate is not "Available": + print response + + self.assertEqual(response.hostha.hastate, "Available") + + def configureAndEnableHostHa(self): + self.apiClient.configureHAForHost(self.getHostHaConfigCmd()) + + response = self.apiClient.enableHAForHost(self.getHostHaEnableCmd()) + self.assertEqual(response.haenable, True) + + def configure_and_start_ipmi_server(self, power_state=None): + """ + Setup ipmisim and enable out-of-band management for host + """ + self.configure_and_enable_oobm() + self.start_ipmi_server() + if power_state: + bmc = IpmiServerContext().bmc + bmc.powerstate = power_state + + def assert_issue_command_state(self, command, expected): + """ + Asserts power action result for a given power command + """ + if command != 'STATUS': + self.issue_power_action_cmd(command) + response = self.issue_power_action_cmd('STATUS') + self.assertEqual(response.powerstate, expected) + + def kill_agent(self): + t_end = time.time() + 90 + while time.time() < t_end: + try: + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')") + return + except Exception: + print("Cannot ssh into: " + self.host.ipaddress) + self.fail(self) + + def set_host_to_alert(self): + self.dbclient.execute("update host set host.status = 'Alert' where host.uuid = '%s'" % self.host.id) + + def check_host_transitioned_to_degraded(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Degraded": + return + else: + continue + self.fail(self) + + def wait_util_host_is_fencing(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Fencing": + return + else: + continue + self.fail(self) + + def check_host_transitioned_to_suspect(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Suspect": + return + else: + continue + self.fail(self) + + def check_host_transitioned_to_checking(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Checking": + return + else: + continue + self.fail(self) + + def wait_util_host_is_fenced(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Fenced": + return + else: + continue + self.fail(self) + + def wait_util_host_is_up(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.state in "Up": + return + else: + continue + self.fail(self) + + def stop_agent(self): + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ + ("service cloudstack-agent stop") + + def start_agent(self): + self.ssh_and_restart_agent() + self.check_host_transition_to_available() + + def ssh_and_restart_agent(self): + t_end = time.time() + 90 + while time.time() < t_end: + try: + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("service cloudstack-agent restart") + return + except Exception: + print("Cannot ssh into: " + self.host.ipaddress) + self.fail(self) + + def check_host_transition_to_available(self): + t_end = time.time() + 90 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate == "Available": + return + else: + continue + self.fail(self) + + def wait_util_host_is_recovered(self): + t_end = time.time() + 180 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Recovered": + return + else: + continue + self.fail(self) + + def reset_host(self): + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("reboot") + + def deploy_vm(self): + vm = VirtualMachine.create( + self.apiClient, + services=self.services["virtual_machine"], + serviceofferingid=self.service_offering.id, + templateid=self.template.id, + zoneid=self.zone.id, + hostid = self.host.id, + method="POST" + ) + + self.cleanup.append(vm) + + def update_configuration(self, name, value): + update_configuration_cmd = updateConfiguration.updateConfigurationCmd() + update_configuration_cmd.name = name + update_configuration_cmd.value = value + update_configuration_cmd.clusterid = self.cluster_id + + self.apiClient.updateConfiguration(update_configuration_cmd) + + + @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_stop_agent_host_is_degraded(self): + """ + Tests HA state turns Degraded when agent is stopped + """ + self.deploy_vm() + + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.stop_agent() + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_degraded() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Enable Host + self.start_agent() + + #@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_recovering_start_agent_host_is_available(self): + """ + Tests HA state turns Recovered when agent is stopped and host is reset + """ + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + # Then kills the agent and wait untill the state is Degraded + + self.deploy_vm() + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.kill_agent() + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_degraded() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Reset host so a shut down could be emulated. During the bootup host should transition into recovered state + self.reset_host() + + # Waits until Degraded host turns into Recovered for 180 seconds, + # if it fails it tries to revert host back to Available + try: + self.wait_util_host_is_recovered() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # SSH into the KVM Host and executes service cloudstack-agent restart of the agent + self.start_agent() + + #@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_fencing_host(self): + """ + Tests HA state turns Recovered when agent is stopped and host is reset, + then configure incorrect OOBM configuration, so that Recover command would fail + and host would transition into Fenced state. + """ + self.deploy_vm() + + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.kill_agent() + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_degraded() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Change OOBM Configuration to invalid so it would fail the recover operations. + cmd = self.getOobmConfigCmd() + cmd.address = "1.1.1.1" + self.apiClient.configureOutOfBandManagement(cmd) + + # Reset host so a shut down could be emulated. During the bootup host should transition into recovered state + self.reset_host() + self.kill_agent() + + # Waits until Recovering host turns into Fencing for 180 seconds, + # if it fails it tries to revert host back to Up + try: + self.wait_util_host_is_fencing() + except Exception as e: + self.ssh_and_restart_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Configure correct OOBM configuration so that the Fencing operation would succeed + self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd()) + + # Waits until Fencing host turns into Fenced for 180 seconds, + # if it fails it tries to revert host back to Up + try: + self.wait_util_host_is_fenced() + except Exception as e: + self.ssh_and_restart_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # SSH into the KVM Host and executes service cloudstack-agent restart of the agent + self.ssh_and_restart_agent() + + # Waits until state is Up so that cleanup would be successful + self.wait_util_host_is_up() + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_kill_agent_host_is_degraded(self): + """ + Tests HA state turns Suspect/Checking when some activity/health checks fail + Configures HA, Logs into to a host and restarts the service + Then it confirms the ha state jumps through Suspect -> Checking -> Available + """ + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.ssh_and_restart_agent() + + # Checks if the host would turn into Suspect in the next 120 seconds + try: + self.check_host_transitioned_to_suspect() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_checking() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Enable Host + self.check_host_transition_to_available() diff --git a/test/integration/smoke/test_hostha_simulator.py b/test/integration/smoke/test_hostha_simulator.py new file mode 100644 index 00000000000..2315c38e12b --- /dev/null +++ b/test/integration/smoke/test_hostha_simulator.py @@ -0,0 +1,651 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import marvin +from marvin.cloudstackTestCase import * +from marvin.cloudstackAPI import * +from marvin.lib.utils import * +from marvin.lib.base import * +from marvin.lib.common import * +from nose.plugins.attrib import attr + +import random + +from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer + +import random +import socket +import sys +import thread +import time + + +class TestHostHA(cloudstackTestCase): + """ Test cases for host HA using Simulator host(s) + """ + + def setUp(self): + self.apiclient = self.testClient.getApiClient() + self.hypervisor = self.testClient.getHypervisorInfo() + self.dbclient = self.testClient.getDbConnection() + self.services = self.testClient.getParsedTestDataConfig() + self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ + self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) + + # Cleanup any existing configs + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.host = None + + # use random port for ipmisim + s = socket.socket() + s.bind(('', 0)) + self.serverPort = s.getsockname()[1] + s.close() + + self.cleanup = [] + + def tearDown(self): + try: + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.dbclient.execute("update host set resource_state='Enabled' where type='Routing' and resource_state='Maintenance'") + cleanup_resources(self.apiclient, self.cleanup) + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + def getFakeMsId(self): + return self.fakeMsId + + def getFakeMsRunId(self): + return self.fakeMsId * 1000 + + def getHost(self, hostId=None): + if self.host and hostId is None: + return self.host + + response = list_hosts( + self.apiclient, + type='Routing', + hypervisor='Simulator', + resourcestate='Enabled', + id=hostId + ) + if response and len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No simulator hosts found, skipping host-ha test") + + def getHostHaConfigCmd(self, provider='simulatorhaprovider'): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.provider = provider + cmd.hostid = self.getHost().id + return cmd + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getHostHaDisableCmd(self): + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def configureAndEnableHostHa(self, initialize=True): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd()) + self.assertEqual(response.haenable, True) + if initialize: + self.configureSimulatorHAProviderState(True, True, True, False) + + def configureAndDisableHostHa(self, hostId): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + cmd.hostid = hostId + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + def enableHostHa(self, hostId): + cmd = self.getHostHaEnableCmd() + cmd.hostid = hostId + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + def configureSimulatorHAProviderState(self, health, activity, recover, fence): + cmd = configureSimulatorHAProviderState.configureSimulatorHAProviderStateCmd() + cmd.hostid = self.getHost().id + cmd.health = health + cmd.activity = activity + cmd.recover = recover + cmd.fence = fence + response = self.apiclient.configureSimulatorHAProviderState(cmd) + self.assertEqual(response.success, 'true') + + def getSimulatorHAStateTransitions(self, hostId): + cmd = listSimulatorHAStateTransitions.listSimulatorHAStateTransitionsCmd() + cmd.hostid = hostId + return self.apiclient.listSimulatorHAStateTransitions(cmd) + + def checkSyncToState(self, state, interval=5000): + def checkForStateSync(expectedState): + response = self.getHost(hostId=self.getHost().id).hostha + return response.hastate == expectedState, None + + sync_interval = 1 + int(interval) / 1000 + res, _ = wait_until(sync_interval, 10, checkForStateSync, state) + if not res: + self.fail("Failed to get host.hastate synced to expected state:" + state) + response = self.getHost(hostId=self.getHost().id).hostha + self.assertEqual(response.hastate, state) + + def get_non_configured_ha_host(self): + response = list_hosts( + self.apiclient, + type='Routing' + ) + for host in response: + if host.haprovider is None: + return host + else: + return None + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_enable_feature_without_setting_provider(self): + """ + Tests Enable HA without setting the provider, Exception is thrown + """ + host = self.get_non_configured_ha_host() + + if host is None: + cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.") + + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_list_providers(self): + """ + Tests default ha providers list + """ + cmd = listHostHAProviders.listHostHAProvidersCmd() + + cmd.hypervisor = 'Simulator' + response = self.apiclient.listHostHAProviders(cmd)[0] + self.assertEqual(response.haprovider, 'SimulatorHAProvider') + + cmd.hypervisor = 'KVM' + response = self.apiclient.listHostHAProviders(cmd)[0] + self.assertEqual(response.haprovider, 'KVMHAProvider') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_configure_invalid_provider(self): + """ + Tests host-ha configuration with invalid driver + """ + cmd = self.getHostHaConfigCmd() + cmd.provider = 'randomDriverThatDoesNotExist' + try: + response = self.apiclient.configureHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_configure_default_driver(self): + """ + Tests host-ha configuration with valid data + """ + cmd = self.getHostHaConfigCmd() + response = self.apiclient.configureHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haprovider, cmd.provider.lower()) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_enable_feature_invalid(self): + """ + Tests ha feature enable command with invalid options + """ + cmd = self.getHostHaEnableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + try: + cmd = enableHAForCluster.enableHAForClusterCmd() + response = self.apiclient.enableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + try: + cmd = enableHAForZone.enableHAForZoneCmd() + response = self.apiclient.enableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_disable_feature_invalid(self): + """ + Tests ha feature disable command with invalid options + """ + cmd = self.getHostHaDisableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.disableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + try: + cmd = disableHAForCluster.disableHAForClusterCmd() + response = self.apiclient.disableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = disableHAForZone.disableHAForZoneCmd() + response = self.apiclient.disableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_enable_feature_valid(self): + """ + Tests host-ha enable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_disable_feature_valid(self): + """ + Tests host-ha disable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + response = self.getHost(hostId=cmd.hostid).hostha + self.assertEqual(response.hastate, 'Disabled') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_enabledisable_across_clusterzones(self): + """ + Tests ha enable/disable feature at cluster and zone level + Zone > Cluster > Host + """ + self.configureAndEnableHostHa() + + host = self.getHost() + self.checkSyncToState('Available') + response = self.getHost(hostId=host.id).hostha + self.assertTrue(response.hastate == 'Available') + + # Disable at host level + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = host.id + response = self.apiclient.disableHAForHost(cmd) + + # Disable at cluster level + cmd = disableHAForCluster.disableHAForClusterCmd() + cmd.clusterid = host.clusterid + response = self.apiclient.disableHAForCluster(cmd) + + # Disable at zone level + cmd = disableHAForZone.disableHAForZoneCmd() + cmd.zoneid = host.zoneid + response = self.apiclient.disableHAForZone(cmd) + + # HA state check + response = self.getHost(hostId=host.id).hostha + self.assertTrue(response.hastate == 'Disabled') + + # Check ha-state check and sync + self.dbclient.execute("update ha_config set ha_state='Available' where enabled='1' and resource_type='Host'") + self.checkSyncToState('Disabled') + + # Enable at zone level + cmd = enableHAForZone.enableHAForZoneCmd() + cmd.zoneid = host.zoneid + response = self.apiclient.enableHAForZone(cmd) + + # Enable at cluster level + cmd = enableHAForCluster.enableHAForClusterCmd() + cmd.clusterid = host.clusterid + response = self.apiclient.enableHAForCluster(cmd) + + # Enable at host level + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = host.id + response = self.apiclient.enableHAForHost(cmd) + + # Check state sync + self.checkSyncToState('Available') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_multiple_mgmt_server_ownership(self): + """ + Tests ha resource ownership expiry across multi-mgmt server + """ + self.configureAndEnableHostHa() + + cloudstackVersion = Configurations.listCapabilities(self.apiclient).cloudstackversion + + currentMsHosts = [] + mshosts = self.dbclient.execute( + "select msid from mshost where version='%s' and removed is NULL and state='Up'" % (cloudstackVersion)) + if len(mshosts) > 0: + currentMsHosts = map(lambda row: row[0], mshosts) + + # Inject fake ms host + self.dbclient.execute( + "insert into mshost (msid,runid,name,state,version,service_ip,service_port,last_update) values (%s,%s,'ha-marvin-fakebox', 'Down', '%s', '127.0.0.1', '22', NOW())" % ( + self.getFakeMsId(), self.getFakeMsRunId(), cloudstackVersion)) + + # Pass ownership to the fake ms id + self.dbclient.execute( + "update ha_config set mgmt_server_id=%d where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'" % self.getFakeMsId()) + + pingInterval = float(list_configurations( + self.apiclient, + name='ping.interval' + )[0].value) + + pingTimeout = float(list_configurations( + self.apiclient, + name='ping.timeout' + )[0].value) + + def removeFakeMgmtServer(fakeMsRunId): + rows = self.dbclient.execute("select * from mshost_peer where peer_runid=%s" % fakeMsRunId) + if len(rows) > 0: + self.debug("Mgmt server is now trying to contact the fake mgmt server") + self.dbclient.execute("update mshost set removed=now() where runid=%s" % fakeMsRunId) + self.dbclient.execute("update mshost_peer set peer_state='Down' where peer_runid=%s" % fakeMsRunId) + return True, None + return False, None + + def checkHaOwnershipExpiry(fakeMsId): + rows = self.dbclient.execute( + "select mgmt_server_id from ha_config where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'") + if len(rows) > 0 and rows[0][0] != fakeMsId: + self.debug("HA resource ownership expired as node was detected to be gone") + return True, None + return False, None + + retry_interval = 1 + (pingInterval * pingTimeout / 10) + + res, _ = wait_until(retry_interval, 10, removeFakeMgmtServer, self.getFakeMsRunId()) + if not res: + self.fail("Management server failed to turn down or remove fake mgmt server") + + res, _ = wait_until(retry_interval, 100, checkHaOwnershipExpiry, self.getFakeMsId()) + if not res: + self.fail("Management server failed to expire ownership of fenced peer") + + self.debug("Testing ha background sync should claim new ownership") + self.checkSyncToState('Available') + + result = self.dbclient.execute( + "select mgmt_server_id from ha_config where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'") + newOwnerId = result[0][0] + self.assertTrue(newOwnerId in currentMsHosts) + + def checkFSMTransition(self, transition, event, haState, prevHaState, hasActiviyCounter, hasRecoveryCounter): + self.assertEqual(transition.event, event) + self.assertEqual(transition.hastate, haState) + self.assertEqual(transition.prevhastate, prevHaState) + if hasActiviyCounter: + self.assertTrue(transition.activitycounter > 0) + else: + self.assertEqual(transition.activitycounter, 0) + if hasRecoveryCounter: + self.assertTrue(transition.recoverycounter > 0) + else: + self.assertEqual(transition.recoverycounter, 0) + + def findFSMTransitionToState(self, state, host): + transitions = self.getSimulatorHAStateTransitions(host.id) + if not transitions: + return False, (None, None, None) + previousTransition = None + stateTransition = None + nextTransition = None + for transition in transitions: + if stateTransition: + nextTransition = transition + break + if transition.hastate == state: + stateTransition = transition + if not stateTransition: + previousTransition = transition + if stateTransition: + return True, (previousTransition, stateTransition, nextTransition,) + return False, (previousTransition, stateTransition, nextTransition,) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_available(self): + """ + Tests ha FSM transitions for valid healthy host + Simulates health check passing + """ + + host = self.getHost() + self.configureSimulatorHAProviderState(True, True, True, False) + self.configureAndEnableHostHa(False) + + res, (_, T, _) = wait_until(2, 20, self.findFSMTransitionToState, 'available', host) + if not res: + self.fail("FSM did not transition to available state") + + self.checkFSMTransition(T, 'enabled', 'available', 'disabled', False, False) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_degraded(self): + """ + Tests ha FSM transitions leading to degraded state + Simulates health check failures with activity checks passing + FSM transitions should happen indefinitely between: + Available->Suspect<->Checking->Degraded->Available + """ + host = self.getHost() + self.configureSimulatorHAProviderState(False, True, True, False) + self.configureAndEnableHostHa(False) + + # Initial health check failure + res, (_, T, _) = wait_until(2, 20, self.findFSMTransitionToState, 'suspect', host) + if not res: + self.fail("FSM did not transition to suspect state") + + self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False) + + # Check transition to Degraded + res, (prevT, T, nextT) = wait_until(2, 20, self.findFSMTransitionToState, 'degraded', host) + if not res: + self.fail("FSM did not transition to degraded state") + + if prevT: + self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False) + self.checkFSMTransition(T, 'activitycheckfailureunderthresholdratio', 'degraded', 'checking', True, False) + if nextT: + self.checkFSMTransition(nextT, 'periodicrecheckresourceactivity', 'suspect', 'degraded', False, False) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_recovering(self): + """ + Tests ha FSM transitions leading to recovering + Simulates both health and activity check failures + FSM transitions should happen indefinitely between: + Available->Suspect<->Checking->Recovering->Recovered<-retry-loop->->Fencing + """ + host = self.getHost() + self.configureSimulatorHAProviderState(False, False, True, False) + self.configureAndEnableHostHa(False) + + # Initial health check failure + res, (_, T, _) = wait_until(2, 30, self.findFSMTransitionToState, 'suspect', host) + if not res: + self.fail("FSM did not transition to suspect state") + + self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False) + + # Check transition to recovering + res, (prevT, T, nextT) = wait_until(2, 60, self.findFSMTransitionToState, 'recovering', host) + if not res: + self.fail("FSM did not transition to recovering state") + + if prevT: + self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False) + self.checkFSMTransition(T, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, False) + if nextT: + self.checkFSMTransition(nextT, 'recovered', 'recovered', 'recovering', False, True) + + # Check transition to fencing due to recovery attempts exceeded + res, (prevT, T, nextT) = wait_until(2, 60, self.findFSMTransitionToState, 'fencing', host) + if not res: + self.fail("FSM did not transition to fencing state") + + if prevT: + self.checkFSMTransition(prevT, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, + True) + self.checkFSMTransition(T, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_fenced(self): + """ + Tests ha FSM transitions for failures leading to fenced state + FSM transitions should happen indefinitely between: + Available->Suspect<->Checking->Recovering<-fail recovery->->Fencing->Fenced + """ + host = self.getHost() + self.configureSimulatorHAProviderState(False, False, False, True) + self.configureAndEnableHostHa(False) + + # Check for transition to fenced + res, (prevT, T, _) = wait_until(2, 30, self.findFSMTransitionToState, 'fenced', host) + if not res: + self.fail("FSM did not transition to fenced state") + + self.checkFSMTransition(prevT, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True) + self.checkFSMTransition(T, 'fenced', 'fenced', 'fencing', False, False) + + # TODO: add test case for HA vm reboot checks + + # Simulate manual recovery of host and cancel maintenance mode + self.configureSimulatorHAProviderState(True, True, True, False) + cancelCmd = cancelHostMaintenance.cancelHostMaintenanceCmd() + cancelCmd.id = host.id + self.apiclient.cancelHostMaintenance(cancelCmd) + + # Check for transition to available after manual recovery + res, (prevT, T, _) = wait_until(2, 20, self.findFSMTransitionToState, 'available', host) + if not res: + self.fail("FSM did not transition to available state") + + self.checkFSMTransition(prevT, 'healthcheckpassed', 'ineligible', 'fenced', False, False) + self.checkFSMTransition(T, 'eligible', 'available', 'ineligible', False, False) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_configure_ha_provider_invalid(self): + """ + Tests configure HA Provider with invalid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + # Setup wrong configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + try: + self.apiclient.configureHAForHost(conf_ha_cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_configure_ha_provider_valid(self): + """ + Tests configure HA Provider with valid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + + # Setup wrong configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + response = self.apiclient.configureHAForHost(conf_ha_cmd) + + # Check the response contains the set provider and hostID + self.assertEqual(response.haprovider, conf_ha_cmd.provider) + self.assertEqual(response.hostid, conf_ha_cmd.hostid) diff --git a/test/integration/smoke/test_outofbandmanagement.py b/test/integration/smoke/test_outofbandmanagement.py index 05a6d005163..adf19502ec2 100644 --- a/test/integration/smoke/test_outofbandmanagement.py +++ b/test/integration/smoke/test_outofbandmanagement.py @@ -193,8 +193,10 @@ class TestOutOfBandManagement(cloudstackTestCase): cmd.driver = 'randomDriverThatDoesNotExist' try: response = self.apiclient.configureOutOfBandManagement(cmd) + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") @@ -221,20 +223,26 @@ class TestOutOfBandManagement(cloudstackTestCase): cmd.hostid = -1 try: response = self.apiclient.enableOutOfBandManagementForHost(cmd) + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass try: cmd = enableOutOfBandManagementForCluster.enableOutOfBandManagementForClusterCmd() response = self.apiclient.enableOutOfBandManagementForCluster(cmd) + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass try: cmd = enableOutOfBandManagementForZone.enableOutOfBandManagementForZoneCmd() response = self.apiclient.enableOutOfBandManagementForZone(cmd) + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") @@ -247,20 +255,26 @@ class TestOutOfBandManagement(cloudstackTestCase): cmd.hostid = -1 try: response = self.apiclient.disableOutOfBandManagementForHost(cmd) + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass try: cmd = disableOutOfBandManagementForCluster.disableOutOfBandManagementForClusterCmd() response = self.apiclient.disableOutOfBandManagementForCluster(cmd) + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass try: cmd = disableOutOfBandManagementForZone.disableOutOfBandManagementForZoneCmd() response = self.apiclient.disableOutOfBandManagementForZone(cmd) + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") @@ -323,8 +337,10 @@ class TestOutOfBandManagement(cloudstackTestCase): try: self.issuePowerActionCmd('STATUS') - self.fail("Exception was expected, oobm is disabled at zone level") - except Exception: pass + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") # Enable at zone level cmd = enableOutOfBandManagementForZone.enableOutOfBandManagementForZoneCmd() @@ -333,18 +349,16 @@ class TestOutOfBandManagement(cloudstackTestCase): try: self.issuePowerActionCmd('STATUS') - self.fail("Exception was expected, oobm is disabled at cluster level") - except Exception: pass + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") # Check background thread syncs state to Disabled response = self.getHost(hostId=host.id).outofbandmanagement self.assertEqual(response.powerstate, 'Disabled') self.dbclient.execute("update oobm set power_state='On' where port=%d" % self.getIpmiServerPort()) - interval = list_configurations( - self.apiclient, - name='outofbandmanagement.sync.interval' - )[0].value - self.checkSyncToState('Disabled', interval) + self.checkSyncToState('Disabled', 2) # Enable at cluster level cmd = enableOutOfBandManagementForCluster.enableOutOfBandManagementForClusterCmd() @@ -353,8 +367,10 @@ class TestOutOfBandManagement(cloudstackTestCase): try: self.issuePowerActionCmd('STATUS') - self.fail("Exception was expected, oobm is disabled at host level") - except Exception: pass + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") # Enable at host level cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd() @@ -446,26 +462,22 @@ class TestOutOfBandManagement(cloudstackTestCase): Tests out-of-band management background powerstate sync """ self.debug("Testing oobm background sync") - interval = list_configurations( - self.apiclient, - name='outofbandmanagement.sync.interval' - )[0].value self.configureAndEnableOobm() self.startIpmiServer() bmc = IpmiServerContext().bmc bmc.powerstate = 'on' - self.checkSyncToState('On', interval) + self.checkSyncToState('On', 2) bmc.powerstate = 'off' - self.checkSyncToState('Off', interval) + self.checkSyncToState('Off', 2) self.server.shutdown() self.server.server_close() # Check for unknown state (ipmi server not reachable) - self.checkSyncToState('Unknown', interval) + self.checkSyncToState('Unknown', 2) @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") @@ -527,16 +539,12 @@ class TestOutOfBandManagement(cloudstackTestCase): self.fail("Management server failed to expire ownership of fenced peer") self.debug("Testing oobm background sync should claim new ownership") - interval = list_configurations( - self.apiclient, - name='outofbandmanagement.sync.interval' - )[0].value self.startIpmiServer() bmc = IpmiServerContext().bmc bmc.powerstate = 'on' - self.checkSyncToState('On', interval) + self.checkSyncToState('On', 2) result = self.dbclient.execute("select mgmt_server_id from oobm where port=%d" % (self.getIpmiServerPort())) newOwnerId = result[0][0] @@ -577,8 +585,10 @@ class TestOutOfBandManagement(cloudstackTestCase): try: response = self.issuePowerActionCmd('STATUS') + except Exception: + pass + else: self.fail("Expected an exception to be thrown, failing") - except Exception: pass alerts = Alert.list(self.apiclient, keyword="auth-error", listall=True) diff --git a/tools/marvin/marvin/deployDataCenter.py b/tools/marvin/marvin/deployDataCenter.py index 85057d2f9f3..f39014f5973 100644 --- a/tools/marvin/marvin/deployDataCenter.py +++ b/tools/marvin/marvin/deployDataCenter.py @@ -36,6 +36,7 @@ from marvin.config.test_data import test_data from sys import exit import os import pickle +import threading from time import sleep, strftime, localtime from optparse import OptionParser @@ -172,7 +173,7 @@ class DeployDataCenters(object): vmwareDc.zoneid = zoneId self.addVmWareDataCenter(vmwareDc) - for cluster in clusters: + def createCluster(cluster): clustercmd = addCluster.addClusterCmd() clustercmd.clustername = cluster.clustername clustercmd.clustertype = cluster.clustertype @@ -197,6 +198,16 @@ class DeployDataCenters(object): zoneId, podId, clusterId) + + threads = [] + for cluster in clusters: + t = threading.Thread(target=createCluster, args=(cluster,)) + t.start() + threads.append(t) + + for t in threads: + t.join(3600) + except Exception as e: print "Exception Occurred %s" % GetDetailExceptionInfo(e) self.__tcRunLogger.exception("====Cluster %s Creation Failed" diff --git a/tools/marvin/marvin/lib/utils.py b/tools/marvin/marvin/lib/utils.py index 8f14333251c..f68abc77675 100644 --- a/tools/marvin/marvin/lib/utils.py +++ b/tools/marvin/marvin/lib/utils.py @@ -505,7 +505,7 @@ def verifyRouterState(apiclient, routerid, allowedstates): return [FAIL, "Redundant state of the router should be in %s but is %s" % (allowedstates, routers[0].redundantstate)] return [PASS, None] - + def wait_until(retry_interval=2, no_of_times=2, callback=None, *callback_args): """ Utility method to try out the callback method at most no_of_times with a interval of retry_interval, @@ -513,7 +513,7 @@ def wait_until(retry_interval=2, no_of_times=2, callback=None, *callback_args): if callback is None: raise ("Bad value for callback method !") - + wait_result = False for i in range(0,no_of_times): time.sleep(retry_interval) @@ -524,4 +524,3 @@ def wait_until(retry_interval=2, no_of_times=2, callback=None, *callback_args): break return wait_result, return_val - diff --git a/ui/css/cloudstack3.css b/ui/css/cloudstack3.css index 2df41a6ac22..c99cddd650d 100644 --- a/ui/css/cloudstack3.css +++ b/ui/css/cloudstack3.css @@ -12674,6 +12674,38 @@ div.ui-dialog div.autoscaler div.field-group div.form-container form div.form-it background-position: -137px -614px; } +.blankHAForHost .icon { + background-position: -266px -31px; +} + +.blankHAForHost:hover .icon { + background-position: -266px -31px; +} + +.configureHAForHost .icon { + background-position: -270px -148px; +} + +.configureHAForHost:hover .icon { + background-position: -270px -728px; +} + +.enableHA .icon { + background-position: -265px -93px; +} + +.enableHA:hover .icon { + background-position: -265px -673px; +} + +.disableHA .icon { + background-position: -265px -120px; +} + +.disableHA:hover .icon { + background-position: -265px -700px; +} + .blankOutOfBandManagement .icon { background-position: -266px -31px; } diff --git a/ui/dictionary.jsp b/ui/dictionary.jsp index 57dbffa8ae8..a6eec167401 100644 --- a/ui/dictionary.jsp +++ b/ui/dictionary.jsp @@ -655,6 +655,12 @@ dictionary = { 'label.guest.start.ip': '', 'label.guest.type': '', 'label.ha.enabled': '', +'label.ha.configure': '', +'label.ha.disable': '', +'label.ha.enable': '', +'label.ha.provider': '', +'label.ha.state': '', +'label.ha': '', 'label.help': '', 'label.hide.ingress.rule': '', 'label.hints': '', @@ -992,6 +998,7 @@ dictionary = { 'label.outofbandmanagement.disable': '', 'label.outofbandmanagement.enable': '', 'label.outofbandmanagement.password': '', +'label.outofbandmanagement.reenterpassword': '', 'label.outofbandmanagement.port': '', 'label.outofbandmanagement.timeout': '', 'label.outofbandmanagement.username': '', diff --git a/ui/images/sprites.png b/ui/images/sprites.png index 0ddafaff27e..5566aa6b081 100755 Binary files a/ui/images/sprites.png and b/ui/images/sprites.png differ diff --git a/ui/scripts/system.js b/ui/scripts/system.js index c4f4260f501..6f1f82f1a24 100644 --- a/ui/scripts/system.js +++ b/ui/scripts/system.js @@ -8163,6 +8163,80 @@ notification: { poll: pollAsyncJobResult } + }, + enableHA: { + label: 'label.ha.enable', + action: function (args) { + var data = { + zoneid: args.context.physicalResources[0].id + }; + $.ajax({ + url: createURL("enableHAForZone"), + data: data, + success: function (json) { + var jid = json.enablehaforzoneresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return zoneActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.enable'; + }, + notification: function (args) { + return 'label.ha.enable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + disableHA: { + label: 'label.ha.disable', + action: function (args) { + var data = { + zoneid: args.context.physicalResources[0].id + }; + $.ajax({ + url: createURL("disableHAForZone"), + data: data, + success: function (json) { + var jid = json.disablehaforzoneresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return zoneActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.disable'; + }, + notification: function (args) { + return 'label.ha.disable'; + } + }, + notification: { + poll: pollAsyncJobResult + } } }, tabs: { @@ -14886,8 +14960,81 @@ notification: { poll: pollAsyncJobResult } + }, + enableHA: { + label: 'label.ha.enable', + action: function (args) { + var data = { + clusterid: args.context.clusters[0].id + }; + $.ajax({ + url: createURL("enableHAForCluster"), + data: data, + success: function (json) { + var jid = json.enablehaforclusterresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return clusterActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.enable'; + }, + notification: function (args) { + return 'label.ha.enable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + disableHA: { + label: 'label.ha.disable', + action: function (args) { + var data = { + clusterid: args.context.clusters[0].id + }; + $.ajax({ + url: createURL("disableHAForCluster"), + data: data, + success: function (json) { + var jid = json.disablehaforclusterresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return clusterActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.disable'; + }, + notification: function (args) { + return 'label.ha.disable'; + } + }, + notification: { + poll: pollAsyncJobResult + } } - }, tabs: { @@ -16210,6 +16357,168 @@ } }, + blankHAForHost: { + label: '', + action: function (args) { + } + }, + + configureHAForHost: { + label: 'label.ha.configure', + messages: { + confirm: function (args) { + return 'label.ha.configure'; + }, + notification: function (args) { + return 'label.ha.configure'; + } + }, + createForm: { + title: 'label.ha.configure', + fields: { + provider: { + label: 'label.ha.provider', + validation: { + required: true + }, + select: function (args) { + $.ajax({ + url: createURL('listHostHAProviders'), + data: {'hypervisor': args.context.hosts[0].hypervisor}, + dataType: 'json', + success: function (json) { + var response = json.listhosthaprovidersresponse; + var items = []; + items.push({ + id: '', + description: _l('') + }); + if (response.haprovider) { + $.each(response.haprovider, function (idx, item) { + items.push({ + id: item.haprovider, + description: item.haprovider + }); + }); + } + args.response.success({ + data: items + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + } + } + } + }, + action: function (args) { + var data = args.data; + data.hostid = args.context.hosts[0].id; + $.ajax({ + url: createURL('configureHAForHost'), + data: data, + dataType: 'json', + success: function (json) { + var jid = json.configurehaforhostresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return hostActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + notification: { + poll: pollAsyncJobResult + } + }, + + enableHA: { + label: 'label.ha.enable', + action: function (args) { + var data = { + hostid: args.context.hosts[0].id, + }; + $.ajax({ + url: createURL("enableHAForHost"), + data: data, + success: function (json) { + var jid = json.enablehaforhostresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return hostActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.enable'; + }, + notification: function (args) { + return 'label.ha.enable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + + disableHA: { + label: 'label.ha.disable', + action: function (args) { + var data = { + hostid: args.context.hosts[0].id, + }; + $.ajax({ + url: createURL("disableHAForHost"), + data: data, + success: function (json) { + var jid = json.disablehaforhostresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return hostActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.disable'; + }, + notification: function (args) { + return 'label.ha.disable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + blankOutOfBandManagement: { label: '', action: function (args) { @@ -16494,10 +16803,21 @@ required: false }, }, + reenterpassword: { + label: 'label.outofbandmanagement.reenterpassword', + isPassword: true, + validation: { + required: false + } + }, } }, action: function (args) { var data = args.data; + if (data.password != data.reenterpassword) { + args.response.error("Passwords do not match"); + return; + } data.hostid = args.context.hosts[0].id; $.ajax({ url: createURL('changeOutOfBandManagementPassword'), @@ -16534,6 +16854,9 @@ if (host.outofbandmanagement == null || !host.outofbandmanagement.enabled) { hiddenTabs.push("outofbandmanagement"); } + if (host.hostha == null || (host.hypervisor != 'KVM' && host.hypervisor != 'Simulator')) { + hiddenTabs.push("ha"); + } return hiddenTabs; }, tabs: { @@ -16582,6 +16905,12 @@ hypervisorversion: { label: 'label.hypervisor.version' }, + hastate: { + label: 'label.ha.state' + }, + haprovider: { + label: 'label.ha.provider' + }, hosttags: { label: 'label.host.tags', isEditable: true, @@ -16687,6 +17016,11 @@ if (item && item.outofbandmanagement) { item.powerstate = item.outofbandmanagement.powerstate; } + if (item && item.hostha) { + item.hastate = item.hostha.hastate; + item.haprovider = item.hostha.haprovider; + item.haenabled = item.hostha.haenable; + } $.ajax({ url: createURL("listDedicatedHosts&hostid=" + args.context.hosts[0].id), @@ -16718,6 +17052,39 @@ } }, + ha: { + title: 'label.ha', + fields: { + haenable: { + label: 'label.ha.enabled', + converter: cloudStack.converters.toBooleanText + }, + hastate: { + label: 'label.ha.state' + }, + haprovider: { + label: 'label.ha.provider' + }, + }, + dataProvider: function (args) { + $.ajax({ + url: createURL("listHosts&id=" + args.context.hosts[0].id), + dataType: "json", + async: true, + success: function (json) { + var host = json.listhostsresponse.host[0]; + var hostha = {}; + if (host && host.hostha) { + hostha = host.hostha; + } + args.response.success({ + data: hostha + }); + } + }); + } + }, + outofbandmanagement: { title: 'label.outofbandmanagement', fields: { @@ -21023,6 +21390,12 @@ allowedActions.push("disableOutOfBandManagement"); } + if (jsonObj.hasOwnProperty('resourcedetails') && jsonObj['resourcedetails'].hasOwnProperty('resourceHAEnabled') && jsonObj['resourcedetails']['resourceHAEnabled'] == 'false') { + allowedActions.push("enableHA"); + } else { + allowedActions.push("disableHA"); + } + return allowedActions; } @@ -21114,6 +21487,12 @@ allowedActions.push("disableOutOfBandManagement"); } + if (jsonObj.hasOwnProperty('resourcedetails') && jsonObj['resourcedetails'].hasOwnProperty('resourceHAEnabled') && jsonObj['resourcedetails']['resourceHAEnabled'] == 'false') { + allowedActions.push("enableHA"); + } else { + allowedActions.push("disableHA"); + } + return allowedActions; } @@ -21150,6 +21529,14 @@ allowedActions.push("remove"); } + allowedActions.push("blankHAForHost"); + allowedActions.push("configureHAForHost"); + if (jsonObj.hasOwnProperty("hostha") && jsonObj.hostha.haenable) { + allowedActions.push("disableHA"); + } else { + allowedActions.push("enableHA"); + } + allowedActions.push("blankOutOfBandManagement"); allowedActions.push("configureOutOfBandManagement"); if (jsonObj.hasOwnProperty("outofbandmanagement") && jsonObj.outofbandmanagement.enabled) { diff --git a/utils/src/com/cloud/utils/exception/CSExceptionErrorCode.java b/utils/src/com/cloud/utils/exception/CSExceptionErrorCode.java index 2837ccbfbfc..5a5b5d11bf8 100755 --- a/utils/src/com/cloud/utils/exception/CSExceptionErrorCode.java +++ b/utils/src/com/cloud/utils/exception/CSExceptionErrorCode.java @@ -68,6 +68,7 @@ public class CSExceptionErrorCode { ExceptionErrorCodeMap.put("com.cloud.exception.VirtualMachineMigrationException", 4395); ExceptionErrorCodeMap.put("com.cloud.async.AsyncCommandQueued", 4540); ExceptionErrorCodeMap.put("com.cloud.exception.RequestLimitException", 4545); + ExceptionErrorCodeMap.put("com.cloud.exception.OperationTimedoutException", 4550); // Have a special error code for ServerApiException when it is // thrown in a standalone manner when failing to detect any of the above