mirror of https://github.com/apache/cloudstack.git
FR3: Host-HA backported changes from master (#50)
- Improves job scheduling using state/event-driven logic - Reduced database and cpu load, by reducing all background threads to one - Improves Simulator and KVM host-ha integration tests - Triggers VM HA on successful host (ipmi reboot) recovery - Improves internal datastructures and checks around HA counter - New FSM events to retry fencing and recovery - Fixes KVM activity script to aggresively check against last update time Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
parent
1f52cd4245
commit
7df52405b0
|
|
@ -108,8 +108,4 @@ public class PrepareForMaintenanceCmd extends BaseAsyncCmd {
|
|||
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to prepare host for maintenance");
|
||||
}
|
||||
}
|
||||
|
||||
public void setHostId(final Long hostId) {
|
||||
id = hostId;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,8 +47,10 @@ public interface HAConfig extends StateObject<HAConfig.HAState>, InternalIdentit
|
|||
ActivityCheckFailureUnderThresholdRatio,
|
||||
PowerCycle,
|
||||
Recovered,
|
||||
RetryRecovery,
|
||||
RecoveryWaitPeriodTimeout,
|
||||
RecoveryOperationThresholdExceeded,
|
||||
RetryFencing,
|
||||
Fenced;
|
||||
|
||||
public Long getServerId() {
|
||||
|
|
@ -123,6 +125,7 @@ public interface HAConfig extends StateObject<HAConfig.HAState>, InternalIdentit
|
|||
|
||||
FSM.addTransition(Recovering, Event.Disabled, Disabled);
|
||||
FSM.addTransition(Recovering, Event.Ineligible, Ineligible);
|
||||
FSM.addTransition(Recovering, Event.RetryRecovery, Recovering);
|
||||
FSM.addTransition(Recovering, Event.Recovered, Recovered);
|
||||
FSM.addTransition(Recovering, Event.RecoveryOperationThresholdExceeded, Fencing);
|
||||
|
||||
|
|
@ -132,6 +135,7 @@ public interface HAConfig extends StateObject<HAConfig.HAState>, InternalIdentit
|
|||
|
||||
FSM.addTransition(Fencing, Event.Disabled, Disabled);
|
||||
FSM.addTransition(Fencing, Event.Ineligible, Ineligible);
|
||||
FSM.addTransition(Fencing, Event.RetryFencing, Fencing);
|
||||
FSM.addTransition(Fencing, Event.Fenced, Fenced);
|
||||
|
||||
FSM.addTransition(Fenced, Event.Disabled, Disabled);
|
||||
|
|
|
|||
|
|
@ -72,6 +72,9 @@ public class SimulatorHAProvider extends HAAbstractHostProvider implements HAPro
|
|||
|
||||
@Override
|
||||
public boolean isEligible(final Host host) {
|
||||
if (host == null) {
|
||||
return false;
|
||||
}
|
||||
final SimulatorHAState haState = hostHAStateMap.get(host.getId());
|
||||
return !isInMaintenanceMode(host) && !isDisabled(host) && haState != null
|
||||
&& Hypervisor.HypervisorType.Simulator.equals(host.getHypervisorType());
|
||||
|
|
@ -130,15 +133,8 @@ public class SimulatorHAProvider extends HAAbstractHostProvider implements HAPro
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event,
|
||||
final HAConfig.HAState newState, final HAConfig vo, final boolean status, final Object opaque) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean postStateTransitionEvent(final StateMachine2.Transition<HAConfig.HAState, HAConfig.Event> transition,
|
||||
final HAConfig vo, final boolean status, final Object opaque) {
|
||||
private boolean addStateTransition(final HAConfig vo, final boolean status,
|
||||
final HAConfig.HAState oldState, final HAConfig.HAState newState, final HAConfig.Event event) {
|
||||
if (vo.getResourceType() != HAResource.ResourceType.Host) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -147,6 +143,18 @@ public class SimulatorHAProvider extends HAAbstractHostProvider implements HAPro
|
|||
return false;
|
||||
}
|
||||
final HAResourceCounter counter = haManager.getHACounter(vo.getResourceId(), vo.getResourceType());
|
||||
return haState.addStateTransition(transition.getToState(), transition.getCurrentState(), transition.getEvent(), counter);
|
||||
return haState.addStateTransition(newState, oldState, event, counter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event,
|
||||
final HAConfig.HAState newState, final HAConfig vo, final boolean status, final Object opaque) {
|
||||
return addStateTransition(vo, status, oldState, newState, event);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean postStateTransitionEvent(final StateMachine2.Transition<HAConfig.HAState, HAConfig.Event> transition,
|
||||
final HAConfig vo, final boolean status, final Object opaque) {
|
||||
return addStateTransition(vo, status, transition.getCurrentState(), transition.getToState(), transition.getEvent());
|
||||
}
|
||||
}
|
||||
|
|
@ -40,7 +40,7 @@
|
|||
<dependency>
|
||||
<groupId>br.com.autonomiccs</groupId>
|
||||
<artifactId>apache-cloudstack-java-client</artifactId>
|
||||
<version>1.0.4</version>
|
||||
<version>1.0.5</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
|
|||
|
|
@ -116,7 +116,8 @@ else
|
|||
lastUpdateTime=${arrTime[1]}
|
||||
echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile
|
||||
|
||||
if [[ $lastSuspectTime -ne $SuspectTime ]]; then
|
||||
suspectTimeDiff=$(expr $SuspectTime - $lastSuspectTime)
|
||||
if [[ $suspectTimeDiff -lt 0 ]]; then
|
||||
if [[ $latestUpdateTime -gt $SuspectTime ]]; then
|
||||
echo "=====> ALIVE <====="
|
||||
else
|
||||
|
|
|
|||
|
|
@ -2150,7 +2150,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
|
|||
}
|
||||
|
||||
try {
|
||||
SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(connection, "service cloudstack-agent restart");
|
||||
SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(connection, "service cloudstack-agent restart || systemctl restart cloudstack-agent");
|
||||
s_logger.debug(result.toString());
|
||||
} catch (SshException e) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -73,4 +73,4 @@ public interface HAManager extends HAConfigManager {
|
|||
boolean isHAEligible(final HAResource resource);
|
||||
Boolean isVMAliveOnHost(final Host host);
|
||||
Status getHostStatus(final Host host);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,31 +17,20 @@
|
|||
|
||||
package org.apache.cloudstack.ha;
|
||||
|
||||
import com.cloud.cluster.ClusterManagerListener;
|
||||
import com.cloud.cluster.ManagementServerHost;
|
||||
import com.cloud.dc.ClusterDetailsDao;
|
||||
import com.cloud.dc.ClusterDetailsVO;
|
||||
import com.cloud.dc.DataCenter;
|
||||
import com.cloud.dc.DataCenterDetailVO;
|
||||
import com.cloud.dc.dao.DataCenterDetailsDao;
|
||||
import com.cloud.domain.Domain;
|
||||
import com.cloud.event.ActionEvent;
|
||||
import com.cloud.event.ActionEventUtils;
|
||||
import com.cloud.event.EventTypes;
|
||||
import com.cloud.host.Host;
|
||||
import com.cloud.host.Status;
|
||||
import com.cloud.host.dao.HostDao;
|
||||
import com.cloud.org.Cluster;
|
||||
import com.cloud.utils.component.ComponentContext;
|
||||
import com.cloud.utils.component.ManagerBase;
|
||||
import com.cloud.utils.component.PluggableService;
|
||||
import com.cloud.utils.db.Transaction;
|
||||
import com.cloud.utils.db.TransactionCallback;
|
||||
import com.cloud.utils.db.TransactionStatus;
|
||||
import com.cloud.utils.exception.CloudRuntimeException;
|
||||
import com.cloud.utils.fsm.NoTransitionException;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.naming.ConfigurationException;
|
||||
|
||||
import org.apache.cloudstack.api.ApiErrorCode;
|
||||
import org.apache.cloudstack.api.ServerApiException;
|
||||
import org.apache.cloudstack.api.command.admin.ha.ConfigureHAForHostCmd;
|
||||
|
|
@ -70,20 +59,35 @@ import org.apache.cloudstack.poll.BackgroundPollTask;
|
|||
import org.apache.cloudstack.utils.identity.ManagementServerNode;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.naming.ConfigurationException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import com.cloud.cluster.ClusterManagerListener;
|
||||
import com.cloud.cluster.ManagementServerHost;
|
||||
import com.cloud.dc.ClusterDetailsDao;
|
||||
import com.cloud.dc.ClusterDetailsVO;
|
||||
import com.cloud.dc.DataCenter;
|
||||
import com.cloud.dc.DataCenterDetailVO;
|
||||
import com.cloud.dc.dao.DataCenterDetailsDao;
|
||||
import com.cloud.domain.Domain;
|
||||
import com.cloud.event.ActionEvent;
|
||||
import com.cloud.event.ActionEventUtils;
|
||||
import com.cloud.event.EventTypes;
|
||||
import com.cloud.host.Host;
|
||||
import com.cloud.host.Status;
|
||||
import com.cloud.host.dao.HostDao;
|
||||
import com.cloud.org.Cluster;
|
||||
import com.cloud.utils.component.ComponentContext;
|
||||
import com.cloud.utils.component.ManagerBase;
|
||||
import com.cloud.utils.component.PluggableService;
|
||||
import com.cloud.utils.db.Transaction;
|
||||
import com.cloud.utils.db.TransactionCallback;
|
||||
import com.cloud.utils.db.TransactionStatus;
|
||||
import com.cloud.utils.exception.CloudRuntimeException;
|
||||
import com.cloud.utils.fsm.NoTransitionException;
|
||||
import com.cloud.utils.fsm.StateListener;
|
||||
import com.cloud.utils.fsm.StateMachine2;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
|
||||
public final class HAManagerImpl extends ManagerBase implements HAManager, ClusterManagerListener, PluggableService, Configurable {
|
||||
public final class HAManagerImpl extends ManagerBase implements HAManager, ClusterManagerListener, PluggableService, Configurable, StateListener<HAConfig.HAState, HAConfig.Event, HAConfig> {
|
||||
public static final Logger LOG = Logger.getLogger(HAManagerImpl.class);
|
||||
|
||||
@Inject
|
||||
|
|
@ -151,7 +155,9 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
if (result) {
|
||||
final String message = String.format("Transitioned host HA state from:%s to:%s due to event:%s for the host id:%d",
|
||||
currentHAState, nextState, event, haConfig.getResourceId());
|
||||
LOG.debug(message);
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace(message);
|
||||
}
|
||||
if (nextState == HAConfig.HAState.Recovering || nextState == HAConfig.HAState.Fencing || nextState == HAConfig.HAState.Fenced) {
|
||||
ActionEventUtils.onActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(),
|
||||
Domain.ROOT_DOMAIN, EventTypes.EVENT_HA_STATE_TRANSITION, message);
|
||||
|
|
@ -306,7 +312,7 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
LOG.debug("HA: Agent is available/suspect/checking Up " + host.getId());
|
||||
}
|
||||
return Status.Down;
|
||||
} else if (haConfig.getState() == HAConfig.HAState.Degraded || haConfig.getState() == HAConfig.HAState.Recovering || haConfig.getState() == HAConfig.HAState.Recovered || haConfig.getState() == HAConfig.HAState.Fencing) {
|
||||
} else if (haConfig.getState() == HAConfig.HAState.Degraded || haConfig.getState() == HAConfig.HAState.Recovering || haConfig.getState() == HAConfig.HAState.Fencing) {
|
||||
if (LOG.isDebugEnabled()){
|
||||
LOG.debug("HA: Agent is disconnected " + host.getId());
|
||||
}
|
||||
|
|
@ -454,23 +460,90 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
return cmdList;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
//////////////// Clustered Manager Listeners /////////////////////
|
||||
//////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////
|
||||
//////////////// Event Listeners /////////////////////
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
@Override
|
||||
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onManagementNodeIsolated() {
|
||||
}
|
||||
|
||||
private boolean processHAStateChange(final HAConfig haConfig, final HAConfig.HAState newState, final boolean status) {
|
||||
if (!status || !checkHAOwnership(haConfig)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final HAResource resource = validateAndFindHAResource(haConfig);
|
||||
if (resource == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
|
||||
if (haProvider == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
|
||||
|
||||
// Perform activity checks
|
||||
if (newState == HAConfig.HAState.Checking) {
|
||||
final ActivityCheckTask job = ComponentContext.inject(new ActivityCheckTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.ActivityCheckTimeout, activityCheckExecutor, counter.getSuspectTimeStamp()));
|
||||
activityCheckExecutor.submit(job);
|
||||
}
|
||||
|
||||
// Attempt recovery
|
||||
if (newState == HAConfig.HAState.Recovering) {
|
||||
if (counter.getRecoveryCounter() >= (Long) (haProvider.getConfigValue(HAProviderConfig.MaxRecoveryAttempts, resource))) {
|
||||
return false;
|
||||
}
|
||||
final RecoveryTask task = ComponentContext.inject(new RecoveryTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.RecoveryTimeout, recoveryExecutor));
|
||||
final Future<Boolean> recoveryFuture = recoveryExecutor.submit(task);
|
||||
counter.setRecoveryFuture(recoveryFuture);
|
||||
}
|
||||
|
||||
// Fencing
|
||||
if (newState == HAConfig.HAState.Fencing) {
|
||||
final FenceTask task = ComponentContext.inject(new FenceTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.FenceTimeout, fenceExecutor));
|
||||
final Future<Boolean> fenceFuture = fenceExecutor.submit(task);
|
||||
counter.setFenceFuture(fenceFuture);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event, final HAConfig.HAState newState, final HAConfig haConfig, final boolean status, final Object opaque) {
|
||||
if (oldState != newState || newState == HAConfig.HAState.Suspect || newState == HAConfig.HAState.Checking) {
|
||||
return false;
|
||||
}
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("HA state pre-transition:: new state=" + newState + ", old state=" + oldState + ", for resource id=" + haConfig.getResourceId() + ", status=" + status + ", ha config state=" + haConfig.getState());
|
||||
}
|
||||
if (status && haConfig.getState() != newState) {
|
||||
LOG.warn("HA state pre-transition:: HA state is not equal to transition state, HA state=" + haConfig.getState() + ", new state=" + newState);
|
||||
}
|
||||
return processHAStateChange(haConfig, newState, status);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean postStateTransitionEvent(final StateMachine2.Transition<HAConfig.HAState, HAConfig.Event> transition, final HAConfig haConfig, final boolean status, final Object opaque) {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("HA state post-transition:: new state=" + transition.getToState() + ", old state=" + transition.getCurrentState() + ", for resource id=" + haConfig.getResourceId() + ", status=" + status + ", ha config state=" + haConfig.getState());
|
||||
}
|
||||
if (status && haConfig.getState() != transition.getToState()) {
|
||||
LOG.warn("HA state post-transition:: HA state is not equal to transition state, HA state=" + haConfig.getState() + ", new state=" + transition.getToState());
|
||||
}
|
||||
return processHAStateChange(haConfig, transition.getToState(), status);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
|
|
@ -522,10 +595,8 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
0L, TimeUnit.MILLISECONDS,
|
||||
new ArrayBlockingQueue<Runnable>(fenceOperationQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy());
|
||||
|
||||
pollManager.submitTask(new HealthCheckPollTask());
|
||||
pollManager.submitTask(new ActivityCheckPollTask());
|
||||
pollManager.submitTask(new RecoveryPollTask());
|
||||
pollManager.submitTask(new FencingPollTask());
|
||||
pollManager.submitTask(new HAManagerBgPollTask());
|
||||
HAConfig.HAState.getStateMachine().registerListener(this);
|
||||
|
||||
LOG.debug("HA manager has been configured");
|
||||
return true;
|
||||
|
|
@ -558,7 +629,7 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
//////////////// Poll Tasks /////////////////////
|
||||
/////////////////////////////////////////////////
|
||||
|
||||
private final class HealthCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask {
|
||||
private final class HAManagerBgPollTask extends ManagedContextRunnable implements BackgroundPollTask {
|
||||
@Override
|
||||
protected void runInContext() {
|
||||
try {
|
||||
|
|
@ -581,6 +652,19 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
continue;
|
||||
}
|
||||
|
||||
switch (haConfig.getState()) {
|
||||
case Available:
|
||||
case Suspect:
|
||||
case Degraded:
|
||||
case Fenced:
|
||||
final HealthCheckTask task = ComponentContext.inject(new HealthCheckTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.HealthCheckTimeout, healthCheckExecutor));
|
||||
healthCheckExecutor.submit(task);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
|
||||
|
||||
if (haConfig.getState() == HAConfig.HAState.Suspect) {
|
||||
|
|
@ -595,17 +679,25 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
}
|
||||
}
|
||||
|
||||
switch (haConfig.getState()) {
|
||||
case Available:
|
||||
case Suspect:
|
||||
case Degraded:
|
||||
case Fenced:
|
||||
final HealthCheckTask task = ComponentContext.inject(new HealthCheckTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.HealthCheckTimeout, healthCheckExecutor));
|
||||
healthCheckExecutor.submit(task);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
if (haConfig.getState() == HAConfig.HAState.Recovering) {
|
||||
if (counter.getRecoveryCounter() >= (Long) (haProvider.getConfigValue(HAProviderConfig.MaxRecoveryAttempts, resource))) {
|
||||
transitionHAState(HAConfig.Event.RecoveryOperationThresholdExceeded, haConfig);
|
||||
} else {
|
||||
transitionHAState(HAConfig.Event.RetryRecovery, haConfig);
|
||||
}
|
||||
}
|
||||
|
||||
if (haConfig.getState() == HAConfig.HAState.Recovered) {
|
||||
counter.markRecoveryStarted();
|
||||
if (counter.canExitRecovery((Long)(haProvider.getConfigValue(HAProviderConfig.RecoveryWaitTimeout, resource)))) {
|
||||
if (transitionHAState(HAConfig.Event.RecoveryWaitPeriodTimeout, haConfig)) {
|
||||
counter.markRecoveryCompleted();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (haConfig.getState() == HAConfig.HAState.Fencing && counter.canAttemptFencing()) {
|
||||
transitionHAState(HAConfig.Event.RetryFencing, haConfig);
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
|
|
@ -617,151 +709,5 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust
|
|||
public Long getDelay() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final class ActivityCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask {
|
||||
@Override
|
||||
protected void runInContext() {
|
||||
try {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("HA activity check task is running...");
|
||||
}
|
||||
final List<HAConfig> haConfigList = new ArrayList<HAConfig>(haConfigDao.listAll());
|
||||
for (final HAConfig haConfig : haConfigList) {
|
||||
if (!checkHAOwnership(haConfig)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAResource resource = validateAndFindHAResource(haConfig);
|
||||
if (resource == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
|
||||
if (haProvider == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (haConfig.getState() == HAConfig.HAState.Checking) {
|
||||
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
|
||||
final ActivityCheckTask job = ComponentContext.inject(new ActivityCheckTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.ActivityCheckTimeout, activityCheckExecutor, counter.getSuspectTimeStamp()));
|
||||
activityCheckExecutor.submit(job);
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
LOG.error("Error trying to perform activity checks in HA manager", t);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long getDelay() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final class RecoveryPollTask extends ManagedContextRunnable implements BackgroundPollTask {
|
||||
@Override
|
||||
protected void runInContext() {
|
||||
try {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("HA recovery task is running...");
|
||||
}
|
||||
final List<HAConfig> haConfigList = new ArrayList<HAConfig>(haConfigDao.listAll());
|
||||
for (final HAConfig haConfig : haConfigList) {
|
||||
if (!checkHAOwnership(haConfig)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAResource resource = validateAndFindHAResource(haConfig);
|
||||
if (resource == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
|
||||
if (haProvider == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
|
||||
if (haConfig.getState() == HAConfig.HAState.Recovering) {
|
||||
if (counter.canAttemptRecovery()) {
|
||||
if (counter.getRecoveryCounter() >= (Long)(haProvider.getConfigValue(HAProviderConfig.MaxRecoveryAttempts, resource))) {
|
||||
transitionHAState(HAConfig.Event.RecoveryOperationThresholdExceeded, haConfig);
|
||||
continue;
|
||||
}
|
||||
|
||||
final RecoveryTask task = ComponentContext.inject(new RecoveryTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.RecoveryTimeout, recoveryExecutor));
|
||||
final Future<Boolean> recoveryFuture = recoveryExecutor.submit(task);
|
||||
counter.setRecoveryFuture(recoveryFuture);
|
||||
counter.incrRecoveryCounter();
|
||||
}
|
||||
}
|
||||
if (haConfig.getState() == HAConfig.HAState.Recovered) {
|
||||
counter.markRecoveryStarted();
|
||||
if (counter.canExitRecovery((Long)(haProvider.getConfigValue(HAProviderConfig.RecoveryWaitTimeout, resource)))) {
|
||||
transitionHAState(HAConfig.Event.RecoveryWaitPeriodTimeout, haConfig);
|
||||
counter.markRecoveryCompleted();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
LOG.error("Error trying to perform recovery operation in HA manager", t);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long getDelay() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final class FencingPollTask extends ManagedContextRunnable implements BackgroundPollTask {
|
||||
@Override
|
||||
protected void runInContext() {
|
||||
try {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("HA fencing task is running...");
|
||||
}
|
||||
final List<HAConfig> haConfigList = new ArrayList<HAConfig>(haConfigDao.listAll());
|
||||
for (final HAConfig haConfig : haConfigList) {
|
||||
if (!checkHAOwnership(haConfig)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAResource resource = validateAndFindHAResource(haConfig);
|
||||
if (resource == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
|
||||
if (haProvider == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
|
||||
if (counter.lastFencingCompleted()) {
|
||||
if (haConfig.getState() == HAConfig.HAState.Fencing) {
|
||||
final FenceTask task = ComponentContext.inject(new FenceTask(resource, haProvider, haConfig,
|
||||
HAProviderConfig.FenceTimeout, fenceExecutor));
|
||||
final Future<Boolean> fenceFuture = fenceExecutor.submit(task);
|
||||
counter.setFenceFuture(fenceFuture);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
LOG.error("Error trying to perform fencing operation in HA manager", t);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long getDelay() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ public final class HAResourceCounter {
|
|||
}
|
||||
|
||||
public synchronized void incrActivityCounter(final boolean isFailure) {
|
||||
lastActivityCheckTimestamp = System.currentTimeMillis();
|
||||
activityCheckCounter.incrementAndGet();
|
||||
if (isFailure) {
|
||||
activityCheckFailureCounter.incrementAndGet();
|
||||
|
|
@ -71,8 +70,12 @@ public final class HAResourceCounter {
|
|||
return activityCheckFailureCounter.get() > (activityCheckCounter.get() * failureRatio);
|
||||
}
|
||||
|
||||
public boolean canPerformActivityCheck(final Long activityCheckInterval) {
|
||||
return lastActivityCheckTimestamp == null || (System.currentTimeMillis() - lastActivityCheckTimestamp) > (activityCheckInterval * 1000);
|
||||
public synchronized boolean canPerformActivityCheck(final Long activityCheckInterval) {
|
||||
if (lastActivityCheckTimestamp == null || (System.currentTimeMillis() - lastActivityCheckTimestamp) > (activityCheckInterval * 1000)) {
|
||||
lastActivityCheckTimestamp = System.currentTimeMillis();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean canRecheckActivity(final Long maxDegradedPeriod) {
|
||||
|
|
@ -121,7 +124,7 @@ public final class HAResourceCounter {
|
|||
fenceFuture = future;
|
||||
}
|
||||
|
||||
public boolean lastFencingCompleted() {
|
||||
public boolean canAttemptFencing() {
|
||||
return fenceFuture == null || fenceFuture.isDone();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,12 +17,11 @@
|
|||
|
||||
package org.apache.cloudstack.ha.provider;
|
||||
|
||||
import com.cloud.utils.component.Adapter;
|
||||
|
||||
import org.apache.cloudstack.ha.HAConfig;
|
||||
import org.apache.cloudstack.ha.HAResource;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import org.apache.cloudstack.ha.HAResource;
|
||||
import com.cloud.utils.component.Adapter;
|
||||
|
||||
public interface HAProvider<R extends HAResource> extends Adapter {
|
||||
|
||||
|
|
@ -57,7 +56,9 @@ public interface HAProvider<R extends HAResource> extends Adapter {
|
|||
|
||||
boolean fence(R r) throws HAFenceException;
|
||||
|
||||
void setFenced(R r);
|
||||
void fenceSubResources(R r);
|
||||
|
||||
void enableMaintenance(R r);
|
||||
|
||||
void sendAlert(R r, HAConfig.HAState nextState);
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ public abstract class HAAbstractHostProvider extends AdapterBase implements HAPr
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setFenced(final Host r) {
|
||||
public void fenceSubResources(final Host r) {
|
||||
if (r.getState() != Status.Down) {
|
||||
try {
|
||||
LOG.debug("Trying to disconnect the host without investigation and scheduling HA for the VMs on host id=" + r.getId());
|
||||
|
|
@ -80,11 +80,15 @@ public abstract class HAAbstractHostProvider extends AdapterBase implements HAPr
|
|||
} catch (Exception e) {
|
||||
LOG.error("Failed to disconnect host and schedule HA restart of VMs after fencing the host: ", e);
|
||||
}
|
||||
try {
|
||||
resourceManager.resourceStateTransitTo(r, ResourceState.Event.InternalEnterMaintenance, ManagementServerNode.getManagementServerId());
|
||||
} catch (NoTransitionException e) {
|
||||
LOG.error("Failed to put host in maintenance mode after host-ha fencing and scheduling VM-HA: ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void enableMaintenance(final Host r) {
|
||||
try {
|
||||
resourceManager.resourceStateTransitTo(r, ResourceState.Event.InternalEnterMaintenance, ManagementServerNode.getManagementServerId());
|
||||
} catch (NoTransitionException e) {
|
||||
LOG.error("Failed to put host in maintenance mode after host-ha fencing and scheduling VM-HA: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@
|
|||
|
||||
package org.apache.cloudstack.ha.task;
|
||||
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
import org.apache.cloudstack.ha.HAConfig;
|
||||
import org.apache.cloudstack.ha.HAManager;
|
||||
import org.apache.cloudstack.ha.HAResource;
|
||||
|
|
@ -25,11 +29,7 @@ import org.apache.cloudstack.ha.provider.HACheckerException;
|
|||
import org.apache.cloudstack.ha.provider.HAProvider;
|
||||
import org.apache.cloudstack.ha.provider.HAProvider.HAProviderConfig;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
import org.joda.time.DateTime;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
||||
public class ActivityCheckTask extends BaseHATask {
|
||||
|
||||
|
|
@ -38,22 +38,24 @@ public class ActivityCheckTask extends BaseHATask {
|
|||
@Inject
|
||||
private HAManager haManager;
|
||||
|
||||
private final long disconnectTime;
|
||||
private long disconnectTime;
|
||||
private long maxActivityChecks;
|
||||
private double activityCheckFailureRatio;
|
||||
|
||||
public ActivityCheckTask(final HAResource resource, final HAProvider<HAResource> haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig,
|
||||
final ExecutorService executor, final long disconnectTime) {
|
||||
super(resource, haProvider, haConfig, haProviderConfig, executor);
|
||||
this.disconnectTime = disconnectTime;
|
||||
this.maxActivityChecks = (Long)haProvider.getConfigValue(HAProviderConfig.MaxActivityChecks, resource);
|
||||
this.activityCheckFailureRatio = (Double)haProvider.getConfigValue(HAProviderConfig.ActivityCheckFailureRatio, resource);
|
||||
}
|
||||
|
||||
public boolean performAction() throws HACheckerException {
|
||||
return getHaProvider().hasActivity(getResource(), new DateTime(disconnectTime));
|
||||
}
|
||||
|
||||
public void processResult(boolean result, Throwable t) {
|
||||
public synchronized void processResult(boolean result, Throwable t) {
|
||||
final HAConfig haConfig = getHaConfig();
|
||||
final HAProvider<HAResource> haProvider = getHaProvider();
|
||||
final HAResource resource = getResource();
|
||||
final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
|
||||
|
||||
if (t != null && t instanceof HACheckerException) {
|
||||
|
|
@ -64,18 +66,17 @@ public class ActivityCheckTask extends BaseHATask {
|
|||
|
||||
counter.incrActivityCounter(!result);
|
||||
|
||||
long maxActivityChecks = (Long)haProvider.getConfigValue(HAProviderConfig.MaxActivityChecks, resource);
|
||||
if (counter.getActivityCheckCounter() < maxActivityChecks) {
|
||||
haManager.transitionHAState(HAConfig.Event.TooFewActivityCheckSamples, haConfig);
|
||||
return;
|
||||
}
|
||||
|
||||
double activityCheckFailureRatio = (Double)haProvider.getConfigValue(HAProviderConfig.ActivityCheckFailureRatio, resource);
|
||||
if (counter.hasActivityThresholdExceeded(activityCheckFailureRatio)) {
|
||||
haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureOverThresholdRatio, haConfig);
|
||||
} else {
|
||||
haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureUnderThresholdRatio, haConfig);
|
||||
counter.markResourceDegraded();
|
||||
if (haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureUnderThresholdRatio, haConfig)) {
|
||||
counter.markResourceDegraded();
|
||||
}
|
||||
}
|
||||
counter.resetActivityCounter();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,13 @@
|
|||
|
||||
package org.apache.cloudstack.ha.task;
|
||||
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.cloudstack.ha.HAConfig;
|
||||
import org.apache.cloudstack.ha.HAResource;
|
||||
import org.apache.cloudstack.ha.provider.HACheckerException;
|
||||
|
|
@ -24,13 +31,7 @@ import org.apache.cloudstack.ha.provider.HAFenceException;
|
|||
import org.apache.cloudstack.ha.provider.HAProvider;
|
||||
import org.apache.cloudstack.ha.provider.HARecoveryException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
public abstract class BaseHATask implements Callable<Boolean> {
|
||||
public static final Logger LOG = Logger.getLogger(BaseHATask.class);
|
||||
|
|
@ -40,6 +41,7 @@ public abstract class BaseHATask implements Callable<Boolean> {
|
|||
private final HAConfig haConfig;
|
||||
private final ExecutorService executor;
|
||||
private Long timeout;
|
||||
private DateTime created;
|
||||
|
||||
public BaseHATask(final HAResource resource, final HAProvider<HAResource> haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig,
|
||||
final ExecutorService executor) {
|
||||
|
|
@ -48,6 +50,7 @@ public abstract class BaseHATask implements Callable<Boolean> {
|
|||
this.haConfig = haConfig;
|
||||
this.executor = executor;
|
||||
this.timeout = (Long)haProvider.getConfigValue(haProviderConfig, resource);
|
||||
this.created = new DateTime();
|
||||
}
|
||||
|
||||
public HAProvider<HAResource> getHaProvider() {
|
||||
|
|
@ -74,6 +77,9 @@ public abstract class BaseHATask implements Callable<Boolean> {
|
|||
|
||||
@Override
|
||||
public Boolean call() {
|
||||
if (new DateTime().minusHours(1).isAfter(getCreated())) {
|
||||
return false;
|
||||
}
|
||||
final Future<Boolean> future = executor.submit(new Callable<Boolean>() {
|
||||
@Override
|
||||
public Boolean call() throws HACheckerException, HAFenceException, HARecoveryException {
|
||||
|
|
@ -99,4 +105,7 @@ public abstract class BaseHATask implements Callable<Boolean> {
|
|||
return result;
|
||||
}
|
||||
|
||||
public DateTime getCreated() {
|
||||
return created;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@ public class FenceTask extends BaseHATask {
|
|||
if (result) {
|
||||
counter.resetRecoveryCounter();
|
||||
haManager.transitionHAState(HAConfig.Event.Fenced, haConfig);
|
||||
getHaProvider().setFenced(getResource());
|
||||
getHaProvider().fenceSubResources(getResource());
|
||||
getHaProvider().enableMaintenance(getResource());
|
||||
}
|
||||
getHaProvider().sendAlert(getResource(), HAConfig.HAState.Fencing);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,16 +17,18 @@
|
|||
|
||||
package org.apache.cloudstack.ha.task;
|
||||
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
import org.apache.cloudstack.ha.HAConfig;
|
||||
import org.apache.cloudstack.ha.HAManager;
|
||||
import org.apache.cloudstack.ha.HAResource;
|
||||
import org.apache.cloudstack.ha.HAResourceCounter;
|
||||
import org.apache.cloudstack.ha.provider.HACheckerException;
|
||||
import org.apache.cloudstack.ha.provider.HAProvider;
|
||||
import org.apache.cloudstack.ha.provider.HARecoveryException;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
||||
public class RecoveryTask extends BaseHATask {
|
||||
|
||||
@Inject
|
||||
|
|
@ -43,8 +45,13 @@ public class RecoveryTask extends BaseHATask {
|
|||
|
||||
public void processResult(boolean result, Throwable e) {
|
||||
final HAConfig haConfig = getHaConfig();
|
||||
final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
|
||||
counter.incrRecoveryCounter();
|
||||
counter.resetActivityCounter();
|
||||
|
||||
if (result) {
|
||||
haManager.transitionHAState(HAConfig.Event.Recovered, haConfig);
|
||||
getHaProvider().fenceSubResources(getResource());
|
||||
}
|
||||
getHaProvider().sendAlert(getResource(), HAConfig.HAState.Recovering);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ public class OutOfBandManagementServiceImpl extends ManagerBase implements OutOf
|
|||
}
|
||||
|
||||
public boolean isOutOfBandManagementEnabled(final Host host) {
|
||||
return isOutOfBandManagementEnabledForZone(host.getDataCenterId())
|
||||
return host != null && isOutOfBandManagementEnabledForZone(host.getDataCenterId())
|
||||
&& isOutOfBandManagementEnabledForCluster(host.getClusterId())
|
||||
&& isOutOfBandManagementEnabledForHost(host.getId());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,247 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
from marvin.cloudstackTestCase import *
|
||||
from marvin.cloudstackAPI import *
|
||||
from marvin.lib.utils import *
|
||||
from marvin.lib.common import *
|
||||
from nose.plugins.attrib import attr
|
||||
import cmd
|
||||
from cmd import Cmd
|
||||
|
||||
|
||||
class TestHaForHost(cloudstackTestCase):
|
||||
""" Test cases for configuring HA for Host
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
testClient = super(TestHaForHost, self).getClsTestClient()
|
||||
|
||||
self.apiclient = testClient.getApiClient()
|
||||
self.dbclient = testClient.getDbConnection()
|
||||
self.services = testClient.getParsedTestDataConfig()
|
||||
|
||||
self.zone = get_zone(self.apiclient, testClient.getZoneForTests())
|
||||
self.host = None
|
||||
self.server = None
|
||||
|
||||
self.cleanup = []
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
self.dbclient.execute("delete from ha_config where resource_type='Host'")
|
||||
cleanup_resources(self.apiclient, self.cleanup)
|
||||
except Exception as e:
|
||||
raise Exception("Warning: Exception during cleanup : %s" % e)
|
||||
|
||||
|
||||
def getHost(self, hostId=None):
|
||||
if self.host and hostId is None:
|
||||
return self.host
|
||||
|
||||
response = list_hosts(
|
||||
self.apiclient,
|
||||
zoneid=self.zone.id,
|
||||
type='Routing',
|
||||
id=hostId
|
||||
)
|
||||
if len(response) > 0:
|
||||
self.host = response[0]
|
||||
return self.host
|
||||
raise self.skipTest("No hosts found, skipping HA for Host test")
|
||||
|
||||
|
||||
def getHaProvider(self, host):
|
||||
cmd = listHostHAProviders.listHostHAProvidersCmd()
|
||||
cmd.hypervisor = host.hypervisor
|
||||
response = self.apiclient.listHostHAProviders(cmd)
|
||||
return response[0].haprovider
|
||||
|
||||
|
||||
def configureHaProvider(self):
|
||||
cmd = configureHAForHost.configureHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.provider = self.getHaProvider(self.getHost())
|
||||
return self.apiclient.configureHAForHost(cmd)
|
||||
|
||||
|
||||
def getHaForHostEnableCmd(self):
|
||||
cmd = enableHAForHost.enableHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
|
||||
def getHaForHostDisableCmd(self):
|
||||
cmd = disableHAForHost.disableHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
|
||||
def getListHostHAResources(self):
|
||||
cmd = listHostHAResources.listHostHAResourcesCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_enable_ha_for_host(self):
|
||||
"""
|
||||
This test enables HA for a host
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getHaForHostEnableCmd()
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_enable_ha_for_host_invalid(self):
|
||||
"""
|
||||
This is a negative test for enable HA for a host
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getHaForHostEnableCmd()
|
||||
cmd.hostid = -1
|
||||
|
||||
try:
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_disable_ha_for_host(self):
|
||||
"""
|
||||
This test disables HA for a host
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getHaForHostDisableCmd()
|
||||
|
||||
response = self.apiclient.disableHAForHost(cmd)
|
||||
|
||||
self.assertTrue(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, False)
|
||||
|
||||
response = self.getHost(cmd.hostid)
|
||||
|
||||
self.assertEqual(response.hostha.hastate, "Disabled")
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_disable_ha_for_host_invalid(self):
|
||||
"""
|
||||
This is a negative test for disable HA for a host
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getHaForHostDisableCmd()
|
||||
cmd.hostid = -1
|
||||
|
||||
try:
|
||||
response = self.apiclient.disableHAForHost(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_list_ha_for_host(self):
|
||||
"""
|
||||
Test that verifies the listHAForHost API
|
||||
"""
|
||||
self.configureHaProvider()
|
||||
db_count = self.dbclient.execute("SELECT count(*) FROM cloud.ha_config")
|
||||
|
||||
cmd = self.getListHostHAResources()
|
||||
del cmd.hostid
|
||||
response = self.apiclient.listHostHAResources(cmd)
|
||||
|
||||
self.assertEqual(db_count[0][0], len(response))
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_list_ha_for_host_valid(self):
|
||||
"""
|
||||
Valid test for listing a specific host HA resources
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getListHostHAResources()
|
||||
response = self.apiclient.listHostHAResources(cmd)
|
||||
self.assertEqual(response[0].hostid, cmd.hostid)
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_list_ha_for_host_invalid(self):
|
||||
"""
|
||||
Test that listHostHAResources is returning exception when called with invalid data
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getListHostHAResources()
|
||||
cmd.hostid = "someinvalidvalue"
|
||||
|
||||
try:
|
||||
response = self.apiclient.listHostHAResources(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
|
@ -1,535 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
from marvin.cloudstackTestCase import *
|
||||
from marvin.lib.utils import *
|
||||
from marvin.lib.base import *
|
||||
from marvin.lib.common import *
|
||||
from nose.plugins.attrib import attr
|
||||
|
||||
from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer
|
||||
|
||||
import random
|
||||
import socket
|
||||
import thread
|
||||
|
||||
|
||||
class TestHaKVMAgent(cloudstackTestCase):
|
||||
""" Test cases for out of band management
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
testClient = super(TestHaKVMAgent, self).getClsTestClient()
|
||||
|
||||
self.apiClient = testClient.getApiClient()
|
||||
self.dbclient = testClient.getDbConnection()
|
||||
self.services = testClient.getParsedTestDataConfig()
|
||||
|
||||
self.zone = get_zone(self.apiClient, testClient.getZoneForTests())
|
||||
self.host = self.getHost()
|
||||
self.cluster_id = self.host.clusterid
|
||||
self.server = None
|
||||
|
||||
self.hypervisor = self.testClient.getHypervisorInfo()
|
||||
self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__
|
||||
self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__
|
||||
self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20)
|
||||
|
||||
# Cleanup any existing configs
|
||||
self.dbclient.execute("delete from ha_config where resource_type='Host'")
|
||||
|
||||
# use random port for ipmisim
|
||||
s = socket.socket()
|
||||
s.bind(('', 0))
|
||||
self.serverPort = s.getsockname()[1]
|
||||
s.close()
|
||||
|
||||
# Set Cluster-level setting in order to run tests faster
|
||||
self.update_configuration("kvm.ha.activity.check.failure.ratio", "0.7")
|
||||
self.update_configuration("kvm.ha.activity.check.interval", "10")
|
||||
self.update_configuration("kvm.ha.activity.check.max.attempts", "5")
|
||||
self.update_configuration("kvm.ha.activity.check.timeout", "60")
|
||||
self.update_configuration("kvm.ha.degraded.max.period", "30")
|
||||
self.update_configuration("kvm.ha.fence.timeout", "60")
|
||||
self.update_configuration("kvm.ha.health.check.timeout", "10")
|
||||
self.update_configuration("kvm.ha.recover.failure.threshold", "1")
|
||||
self.update_configuration("kvm.ha.recover.timeout", "120")
|
||||
self.update_configuration("kvm.ha.recover.wait.period", "60")
|
||||
|
||||
self.service_offering = ServiceOffering.create(
|
||||
self.apiClient,
|
||||
self.services["service_offerings"]
|
||||
)
|
||||
|
||||
self.template = get_template(
|
||||
self.apiClient,
|
||||
self.zone.id,
|
||||
self.services["ostype"]
|
||||
)
|
||||
|
||||
self.cleanup = [self.service_offering]
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
|
||||
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
|
||||
self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'")
|
||||
self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'")
|
||||
self.dbclient.execute("delete from ha_config where resource_type='Host'")
|
||||
self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort())
|
||||
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
|
||||
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
|
||||
self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'")
|
||||
self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'")
|
||||
cleanup_resources(self.apiClient, self.cleanup)
|
||||
if self.server:
|
||||
self.server.shutdown()
|
||||
self.server.server_close()
|
||||
except Exception as e:
|
||||
raise Exception("Warning: Exception during cleanup : %s" % e)
|
||||
|
||||
def getFakeMsId(self):
|
||||
return self.fakeMsId
|
||||
|
||||
def getFakeMsRunId(self):
|
||||
return self.fakeMsId * 1000
|
||||
|
||||
def getHostHaConfigCmd(self, provider='kvmhaprovider'):
|
||||
cmd = configureHAForHost.configureHAForHostCmd()
|
||||
cmd.provider = provider
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getHostHaEnableCmd(self):
|
||||
cmd = enableHAForHost.enableHAForHostCmd()
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getHost(self, hostId=None):
|
||||
response = list_hosts(
|
||||
self.apiClient,
|
||||
zoneid=self.zone.id,
|
||||
type='Routing',
|
||||
id=hostId
|
||||
)
|
||||
if len(response) > 0:
|
||||
self.host = response[0]
|
||||
return self.host
|
||||
raise self.skipTest("No hosts found, skipping out-of-band management test")
|
||||
|
||||
def getIpmiServerIp(self):
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"]))
|
||||
return s.getsockname()[0]
|
||||
|
||||
def getIpmiServerPort(self):
|
||||
return self.serverPort
|
||||
|
||||
def getOobmConfigCmd(self):
|
||||
cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd()
|
||||
cmd.driver = 'ipmitool' # The default available driver
|
||||
cmd.address = self.getIpmiServerIp()
|
||||
cmd.port = self.getIpmiServerPort()
|
||||
cmd.username = 'admin'
|
||||
cmd.password = 'password'
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getOobmEnableCmd(self):
|
||||
cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd()
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getOobmDisableCmd(self):
|
||||
cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd()
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getOobmIssueActionCmd(self):
|
||||
cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd()
|
||||
cmd.hostid = self.host.id
|
||||
cmd.action = 'STATUS'
|
||||
return cmd
|
||||
|
||||
def issue_power_action_cmd(self, action, timeout=None):
|
||||
cmd = self.getOobmIssueActionCmd()
|
||||
cmd.action = action
|
||||
if timeout:
|
||||
cmd.timeout = timeout
|
||||
|
||||
try:
|
||||
return self.apiClient.issueOutOfBandManagementPowerAction(cmd)
|
||||
except Exception as e:
|
||||
if "packet session id 0x0 does not match active session" in str(e):
|
||||
raise self.skipTest("Known ipmitool issue hit, skipping test")
|
||||
raise e
|
||||
|
||||
def configure_and_enable_oobm(self):
|
||||
self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd())
|
||||
response = self.apiClient.enableOutOfBandManagementForHost(self.getOobmEnableCmd())
|
||||
self.assertEqual(response.enabled, True)
|
||||
|
||||
def start_ipmi_server(self):
|
||||
def startIpmiServer(tname, server):
|
||||
self.debug("Starting ipmisim server")
|
||||
try:
|
||||
server.serve_forever()
|
||||
except Exception: pass
|
||||
IpmiServerContext('reset')
|
||||
ThreadedIpmiServer.allow_reuse_address = False
|
||||
server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer)
|
||||
thread.start_new_thread(startIpmiServer, ("ipmi-server", server,))
|
||||
self.server = server
|
||||
|
||||
def checkSyncToState(self, state, interval):
|
||||
def checkForStateSync(expectedState):
|
||||
response = self.getHost(hostId=self.host.id).outofbandmanagement
|
||||
return response.powerstate == expectedState, None
|
||||
|
||||
sync_interval = 1 + int(interval)/1000
|
||||
res, _ = wait_until(sync_interval, 10, checkForStateSync, state)
|
||||
if not res:
|
||||
self.fail("Failed to get host.powerstate synced to expected state:" + state)
|
||||
response = self.getHost(hostId=self.host.id).outofbandmanagement
|
||||
self.assertEqual(response.powerstate, state)
|
||||
|
||||
def get_host_in_available_state(self):
|
||||
|
||||
self.configure_and_start_ipmi_server()
|
||||
self.assert_issue_command_state('ON', 'On')
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
self.check_host_transition_to_available()
|
||||
|
||||
response = self.getHost()
|
||||
if response.hostha.hastate is not "Available":
|
||||
print response
|
||||
|
||||
self.assertEqual(response.hostha.hastate, "Available")
|
||||
|
||||
def configureAndEnableHostHa(self):
|
||||
self.apiClient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
|
||||
response = self.apiClient.enableHAForHost(self.getHostHaEnableCmd())
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
def configure_and_start_ipmi_server(self, power_state=None):
|
||||
"""
|
||||
Setup ipmisim and enable out-of-band management for host
|
||||
"""
|
||||
self.configure_and_enable_oobm()
|
||||
self.start_ipmi_server()
|
||||
if power_state:
|
||||
bmc = IpmiServerContext().bmc
|
||||
bmc.powerstate = power_state
|
||||
|
||||
def assert_issue_command_state(self, command, expected):
|
||||
"""
|
||||
Asserts power action result for a given power command
|
||||
"""
|
||||
if command != 'STATUS':
|
||||
self.issue_power_action_cmd(command)
|
||||
response = self.issue_power_action_cmd('STATUS')
|
||||
self.assertEqual(response.powerstate, expected)
|
||||
|
||||
def kill_agent(self):
|
||||
t_end = time.time() + 90
|
||||
while time.time() < t_end:
|
||||
try:
|
||||
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"],
|
||||
passwd=self.hostConfig["password"]).execute \
|
||||
("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')")
|
||||
return
|
||||
except Exception:
|
||||
print("Cannot ssh into: " + self.host.ipaddress)
|
||||
self.fail(self)
|
||||
|
||||
def set_host_to_alert(self):
|
||||
self.dbclient.execute("update host set host.status = 'Alert' where host.uuid = '%s'" % self.host.id)
|
||||
|
||||
def check_host_transitioned_to_degraded(self):
|
||||
t_end = time.time() + 120
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate in "Degraded":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def wait_util_host_is_fencing(self):
|
||||
t_end = time.time() + 120
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate in "Fencing":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def check_host_transitioned_to_suspect(self):
|
||||
t_end = time.time() + 120
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate in "Suspect":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def check_host_transitioned_to_checking(self):
|
||||
t_end = time.time() + 120
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate in "Checking":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def wait_util_host_is_fenced(self):
|
||||
t_end = time.time() + 120
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate in "Fenced":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def wait_util_host_is_up(self):
|
||||
t_end = time.time() + 120
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.state in "Up":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def stop_agent(self):
|
||||
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\
|
||||
("service cloudstack-agent stop")
|
||||
|
||||
def start_agent(self):
|
||||
self.ssh_and_restart_agent()
|
||||
self.check_host_transition_to_available()
|
||||
|
||||
def ssh_and_restart_agent(self):
|
||||
t_end = time.time() + 90
|
||||
while time.time() < t_end:
|
||||
try:
|
||||
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"],
|
||||
passwd=self.hostConfig["password"]).execute \
|
||||
("service cloudstack-agent restart")
|
||||
return
|
||||
except Exception:
|
||||
print("Cannot ssh into: " + self.host.ipaddress)
|
||||
self.fail(self)
|
||||
|
||||
def check_host_transition_to_available(self):
|
||||
t_end = time.time() + 90
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate == "Available":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def wait_util_host_is_recovered(self):
|
||||
t_end = time.time() + 180
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate in "Recovered":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def reset_host(self):
|
||||
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"],
|
||||
passwd=self.hostConfig["password"]).execute \
|
||||
("reboot")
|
||||
|
||||
def deploy_vm(self):
|
||||
vm = VirtualMachine.create(
|
||||
self.apiClient,
|
||||
services=self.services["virtual_machine"],
|
||||
serviceofferingid=self.service_offering.id,
|
||||
templateid=self.template.id,
|
||||
zoneid=self.zone.id,
|
||||
hostid = self.host.id,
|
||||
method="POST"
|
||||
)
|
||||
|
||||
self.cleanup.append(vm)
|
||||
|
||||
def update_configuration(self, name, value):
|
||||
update_configuration_cmd = updateConfiguration.updateConfigurationCmd()
|
||||
update_configuration_cmd.name = name
|
||||
update_configuration_cmd.value = value
|
||||
update_configuration_cmd.clusterid = self.cluster_id
|
||||
|
||||
self.apiClient.updateConfiguration(update_configuration_cmd)
|
||||
|
||||
|
||||
@attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_ha_stop_agent_host_is_degraded(self):
|
||||
"""
|
||||
Tests HA state turns Degraded when agent is stopped
|
||||
"""
|
||||
self.deploy_vm()
|
||||
|
||||
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
|
||||
self.get_host_in_available_state()
|
||||
|
||||
# SSH into the KVM Host and executes kill -9 of the agent
|
||||
self.stop_agent()
|
||||
|
||||
# Checks if the host would turn into Degraded in the next 120 seconds
|
||||
try:
|
||||
self.check_host_transitioned_to_degraded()
|
||||
except Exception as e:
|
||||
self.start_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Enable Host
|
||||
self.start_agent()
|
||||
|
||||
#@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_ha_recovering_start_agent_host_is_available(self):
|
||||
"""
|
||||
Tests HA state turns Recovered when agent is stopped and host is reset
|
||||
"""
|
||||
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
|
||||
# Then kills the agent and wait untill the state is Degraded
|
||||
|
||||
self.deploy_vm()
|
||||
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
|
||||
self.get_host_in_available_state()
|
||||
|
||||
# SSH into the KVM Host and executes kill -9 of the agent
|
||||
self.kill_agent()
|
||||
|
||||
# Checks if the host would turn into Degraded in the next 120 seconds
|
||||
try:
|
||||
self.check_host_transitioned_to_degraded()
|
||||
except Exception as e:
|
||||
self.start_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Reset host so a shut down could be emulated. During the bootup host should transition into recovered state
|
||||
self.reset_host()
|
||||
|
||||
# Waits until Degraded host turns into Recovered for 180 seconds,
|
||||
# if it fails it tries to revert host back to Available
|
||||
try:
|
||||
self.wait_util_host_is_recovered()
|
||||
except Exception as e:
|
||||
self.start_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# SSH into the KVM Host and executes service cloudstack-agent restart of the agent
|
||||
self.start_agent()
|
||||
|
||||
#@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_ha_fencing_host(self):
|
||||
"""
|
||||
Tests HA state turns Recovered when agent is stopped and host is reset,
|
||||
then configure incorrect OOBM configuration, so that Recover command would fail
|
||||
and host would transition into Fenced state.
|
||||
"""
|
||||
self.deploy_vm()
|
||||
|
||||
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
|
||||
self.get_host_in_available_state()
|
||||
|
||||
# SSH into the KVM Host and executes kill -9 of the agent
|
||||
self.kill_agent()
|
||||
|
||||
# Checks if the host would turn into Degraded in the next 120 seconds
|
||||
try:
|
||||
self.check_host_transitioned_to_degraded()
|
||||
except Exception as e:
|
||||
self.start_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Change OOBM Configuration to invalid so it would fail the recover operations.
|
||||
cmd = self.getOobmConfigCmd()
|
||||
cmd.address = "1.1.1.1"
|
||||
self.apiClient.configureOutOfBandManagement(cmd)
|
||||
|
||||
# Reset host so a shut down could be emulated. During the bootup host should transition into recovered state
|
||||
self.reset_host()
|
||||
self.kill_agent()
|
||||
|
||||
# Waits until Recovering host turns into Fencing for 180 seconds,
|
||||
# if it fails it tries to revert host back to Up
|
||||
try:
|
||||
self.wait_util_host_is_fencing()
|
||||
except Exception as e:
|
||||
self.ssh_and_restart_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Configure correct OOBM configuration so that the Fencing operation would succeed
|
||||
self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd())
|
||||
|
||||
# Waits until Fencing host turns into Fenced for 180 seconds,
|
||||
# if it fails it tries to revert host back to Up
|
||||
try:
|
||||
self.wait_util_host_is_fenced()
|
||||
except Exception as e:
|
||||
self.ssh_and_restart_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# SSH into the KVM Host and executes service cloudstack-agent restart of the agent
|
||||
self.ssh_and_restart_agent()
|
||||
|
||||
# Waits until state is Up so that cleanup would be successful
|
||||
self.wait_util_host_is_up()
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_ha_kill_agent_host_is_degraded(self):
|
||||
"""
|
||||
Tests HA state turns Suspect/Checking when some activity/health checks fail
|
||||
Configures HA, Logs into to a host and restarts the service
|
||||
Then it confirms the ha state jumps through Suspect -> Checking -> Available
|
||||
"""
|
||||
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
|
||||
self.get_host_in_available_state()
|
||||
|
||||
# SSH into the KVM Host and executes kill -9 of the agent
|
||||
self.ssh_and_restart_agent()
|
||||
|
||||
# Checks if the host would turn into Suspect in the next 120 seconds
|
||||
try:
|
||||
self.check_host_transitioned_to_suspect()
|
||||
except Exception as e:
|
||||
self.start_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Checks if the host would turn into Degraded in the next 120 seconds
|
||||
try:
|
||||
self.check_host_transitioned_to_checking()
|
||||
except Exception as e:
|
||||
self.start_agent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Enable Host
|
||||
self.check_host_transition_to_available()
|
||||
|
|
@ -39,25 +39,62 @@ class TestHAKVM(cloudstackTestCase):
|
|||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.testClient = super(TestHAKVM, self).getClsTestClient()
|
||||
self.apiclient = self.testClient.getApiClient()
|
||||
self.hypervisor = self.testClient.getHypervisorInfo()
|
||||
self.dbclient = self.testClient.getDbConnection()
|
||||
self.services = self.testClient.getParsedTestDataConfig()
|
||||
self.logger = logging.getLogger('TestHAKVM')
|
||||
|
||||
#Get Zone specifics
|
||||
self.zone = get_zone(self.apiclient, self.testClient.getZoneForTests())
|
||||
self.hypervisor = self.testClient.getHypervisorInfo()
|
||||
self.host = self.getHost()
|
||||
self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__
|
||||
self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__
|
||||
self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20)
|
||||
self.cluster_id = self.host.clusterid
|
||||
|
||||
# Cleanup any existing configs
|
||||
self.dbclient.execute("delete from ha_config where resource_type='Host'")
|
||||
self.host = self.getHost()
|
||||
|
||||
# use random port for ipmisim
|
||||
self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20)
|
||||
s = socket.socket()
|
||||
s.bind(('', 0))
|
||||
self.serverPort = s.getsockname()[1]
|
||||
s.close()
|
||||
|
||||
self.cleanup = []
|
||||
# Set Cluster-level setting in order to run tests faster
|
||||
self.updateConfiguration("kvm.ha.activity.check.failure.ratio", "0.6")
|
||||
self.updateConfiguration("kvm.ha.activity.check.interval", "8")
|
||||
self.updateConfiguration("kvm.ha.activity.check.max.attempts", "5")
|
||||
self.updateConfiguration("kvm.ha.activity.check.timeout", "30")
|
||||
self.updateConfiguration("kvm.ha.degraded.max.period", "30")
|
||||
self.updateConfiguration("kvm.ha.fence.timeout", "30")
|
||||
self.updateConfiguration("kvm.ha.health.check.timeout", "30")
|
||||
self.updateConfiguration("kvm.ha.recover.failure.threshold", "2")
|
||||
self.updateConfiguration("kvm.ha.recover.timeout", "30")
|
||||
self.updateConfiguration("kvm.ha.recover.wait.period", "30")
|
||||
|
||||
self.service_offering = ServiceOffering.create(
|
||||
self.apiclient,
|
||||
self.services["service_offerings"]["hasmall"]
|
||||
)
|
||||
|
||||
self.template = get_template(
|
||||
self.apiclient,
|
||||
self.zone.id,
|
||||
self.services["ostype"]
|
||||
)
|
||||
|
||||
self.configureAndDisableHostHa()
|
||||
self.cleanup = [self.service_offering]
|
||||
|
||||
def updateConfiguration(self, name, value):
|
||||
cmd = updateConfiguration.updateConfigurationCmd()
|
||||
cmd.name = name
|
||||
cmd.value = value
|
||||
cmd.clusterid = self.cluster_id
|
||||
self.apiclient.updateConfiguration(cmd)
|
||||
|
||||
def getFakeMsId(self):
|
||||
return self.fakeMsId
|
||||
|
|
@ -66,6 +103,8 @@ class TestHAKVM(cloudstackTestCase):
|
|||
return self.fakeMsId * 1000
|
||||
|
||||
def tearDown(self):
|
||||
self.configureAndDisableHostHa()
|
||||
self.host = None
|
||||
try:
|
||||
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
|
||||
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
|
||||
|
|
@ -83,70 +122,43 @@ class TestHAKVM(cloudstackTestCase):
|
|||
|
||||
def getHostHaEnableCmd(self):
|
||||
cmd = enableHAForHost.enableHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def check_host_transition_to_available(self):
|
||||
t_end = time.time() + 90
|
||||
while time.time() < t_end:
|
||||
host = self.getHost()
|
||||
if host.hostha.hastate == "Available":
|
||||
return
|
||||
else:
|
||||
continue
|
||||
self.fail(self)
|
||||
|
||||
def getHost(self):
|
||||
|
||||
response = list_hosts(
|
||||
self.apiclient,
|
||||
type='Routing',
|
||||
resourcestate='Enabled'
|
||||
)
|
||||
if response and len(response) > 0:
|
||||
self.host = response[0]
|
||||
return self.host
|
||||
raise self.skipTest("No KVM hosts found, skipping host-ha test")
|
||||
|
||||
def getHost(self, hostId=None):
|
||||
|
||||
response = list_hosts(
|
||||
self.apiclient,
|
||||
type='Routing',
|
||||
hypervisor='kvm',
|
||||
id=hostId
|
||||
)
|
||||
# Check if more than one kvm hosts are available in order to successfully configure host-ha
|
||||
if response and len(response) > 0:
|
||||
self.host = response[0]
|
||||
return self.host
|
||||
raise self.skipTest("No KVM hosts found, skipping host-ha test")
|
||||
raise self.skipTest("Not enough KVM hosts found, skipping host-ha test")
|
||||
|
||||
def getHostHaConfigCmd(self, provider='kvmhaprovider'):
|
||||
cmd = configureHAForHost.configureHAForHostCmd()
|
||||
cmd.provider = provider
|
||||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
def getHostHaEnableCmd(self):
|
||||
cmd = enableHAForHost.enableHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getHostHaDisableCmd(self):
|
||||
cmd = disableHAForHost.disableHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def configureAndEnableHostHa(self, initialize=True):
|
||||
def configureAndEnableHostHa(self):
|
||||
#Adding sleep between configuring and enabling
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd())
|
||||
self.assertEqual(response.haenable, True)
|
||||
if initialize:
|
||||
self.configureKVMHAProviderState(True, True, True, False)
|
||||
|
||||
def configureAndDisableHostHa(self, hostId):
|
||||
def configureAndDisableHostHa(self):
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaDisableCmd()
|
||||
cmd.hostid = hostId
|
||||
cmd.hostid = self.host.id
|
||||
response = self.apiclient.disableHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, False)
|
||||
|
|
@ -159,301 +171,95 @@ class TestHAKVM(cloudstackTestCase):
|
|||
self.assertEqual(response.haenable, True)
|
||||
return response
|
||||
|
||||
def configureKVMHAProviderState(self, health, activity, recover, fence):
|
||||
cmd = configureHAForHost.configureHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.health = health
|
||||
cmd.activity = activity
|
||||
cmd.recover = recover
|
||||
cmd.fence = fence
|
||||
response = self.apiclient.configureKVMHAProviderState(cmd)
|
||||
self.assertEqual(response.success, 'true')
|
||||
def disableAgent(self):
|
||||
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\
|
||||
("systemctl disable cloudstack-agent || chkconfig cloudstack-agent off")
|
||||
|
||||
def checkSyncToState(self, state, interval=5000):
|
||||
def checkForStateSync(expectedState):
|
||||
response = self.getHost(hostId=self.getHost().id).hostha
|
||||
return response.hastate == expectedState, None
|
||||
def resetHost(self):
|
||||
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"],
|
||||
passwd=self.hostConfig["password"]).execute \
|
||||
("reboot")
|
||||
|
||||
sync_interval = 1 + int(interval) / 1000
|
||||
res, _ = wait_until(sync_interval, 10, checkForStateSync, state)
|
||||
def enableAgent(self):
|
||||
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\
|
||||
("systemctl enable cloudstack-agent || chkconfig cloudstack-agent on")
|
||||
|
||||
def waitUntilHostInState(self, state="Available", interval=3):
|
||||
def checkForState(expectedState):
|
||||
response = self.getHost(self.host.id)
|
||||
return response.hostha.hastate == expectedState, None
|
||||
|
||||
res, _ = wait_until(interval, 200, checkForState, state)
|
||||
if not res:
|
||||
self.fail("Failed to get host.hastate synced to expected state:" + state)
|
||||
response = self.getHost(hostId=self.getHost().id).hostha
|
||||
self.assertEqual(response.hastate, state)
|
||||
self.fail("Failed to see host ha state in :" + state)
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_configure_invalid_provider(self):
|
||||
"""
|
||||
Tests host-ha configuration with invalid driver
|
||||
"""
|
||||
cmd = self.getHostHaConfigCmd()
|
||||
cmd.provider = 'randomDriverThatDoesNotExist'
|
||||
try:
|
||||
response = self.apiclient.configureHAForHost(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_configure_default_driver(self):
|
||||
"""
|
||||
Tests host-ha configuration with valid data
|
||||
"""
|
||||
cmd = self.getHostHaConfigCmd()
|
||||
response = self.apiclient.configureHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haprovider, cmd.provider.lower())
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_ha_enable_feature_invalid(self):
|
||||
"""
|
||||
Tests ha feature enable command with invalid options
|
||||
"""
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
cmd.hostid = -1
|
||||
try:
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
try:
|
||||
cmd = enableHAForCluster.enableHAForClusterCmd()
|
||||
response = self.apiclient.enableHAForCluster(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
try:
|
||||
cmd = enableHAForZone.enableHAForZoneCmd()
|
||||
response = self.apiclient.enableHAForZone(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_ha_disable_feature_invalid(self):
|
||||
"""
|
||||
Tests ha feature disable command with invalid options
|
||||
"""
|
||||
cmd = self.getHostHaDisableCmd()
|
||||
cmd.hostid = -1
|
||||
def deployVM(self):
|
||||
try:
|
||||
response = self.apiclient.disableHAForHost(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
try:
|
||||
cmd = disableHAForCluster.disableHAForClusterCmd()
|
||||
response = self.apiclient.disableHAForCluster(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
try:
|
||||
cmd = disableHAForZone.disableHAForZoneCmd()
|
||||
response = self.apiclient.disableHAForZone(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_enable_feature_valid(self):
|
||||
"""
|
||||
Tests host-ha enable feature with valid options
|
||||
"""
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_disable_feature_valid(self):
|
||||
"""
|
||||
Tests host-ha disable feature with valid options
|
||||
"""
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaDisableCmd()
|
||||
response = self.apiclient.disableHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, False)
|
||||
|
||||
response = self.getHost(hostId=cmd.hostid).hostha
|
||||
self.assertEqual(response.hastate, 'Disabled')
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_configure_ha_provider_invalid(self):
|
||||
"""
|
||||
Tests configure HA Provider with invalid provider options
|
||||
"""
|
||||
|
||||
# Enable ha for host
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
host = self.getHost(response.hostid)
|
||||
|
||||
# Setup wrong configuration for the host
|
||||
conf_ha_cmd = configureHAForHost.configureHAForHostCmd()
|
||||
if host.hypervisor.lower() in "simulator":
|
||||
conf_ha_cmd.provider = "kvmhaprovider"
|
||||
if host.hypervisor.lower() in "kvm":
|
||||
conf_ha_cmd.provider = "simulatorhaprovider"
|
||||
|
||||
conf_ha_cmd.hostid = cmd.hostid
|
||||
|
||||
# Call the configure HA provider API with not supported provider for HA
|
||||
try:
|
||||
self.apiclient.configureHAForHost(conf_ha_cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_configure_ha_provider_valid(self):
|
||||
"""
|
||||
Tests configure HA Provider with valid provider options
|
||||
"""
|
||||
|
||||
# Enable ha for host
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
host = self.getHost(response.hostid)
|
||||
|
||||
# Setup configuration for the host
|
||||
conf_ha_cmd = configureHAForHost.configureHAForHostCmd()
|
||||
if host.hypervisor.lower() in "kvm":
|
||||
conf_ha_cmd.provider = "kvmhaprovider"
|
||||
if host.hypervisor.lower() in "simulator":
|
||||
conf_ha_cmd.provider = "simulatorhaprovider"
|
||||
|
||||
conf_ha_cmd.hostid = cmd.hostid
|
||||
|
||||
# Call the configure HA provider API with not supported provider for HA
|
||||
response = self.apiclient.configureHAForHost(conf_ha_cmd)
|
||||
|
||||
# Check the response contains the set provider and hostID
|
||||
self.assertEqual(response.haprovider, conf_ha_cmd.provider)
|
||||
self.assertEqual(response.hostid, conf_ha_cmd.hostid)
|
||||
vm = VirtualMachine.create(
|
||||
self.apiclient,
|
||||
services=self.services["virtual_machine"],
|
||||
serviceofferingid=self.service_offering.id,
|
||||
templateid=self.template.id,
|
||||
zoneid=self.zone.id,
|
||||
hostid = self.host.id,
|
||||
method="POST"
|
||||
)
|
||||
self.cleanup.append(vm)
|
||||
except Exception as e:
|
||||
raise self.skipTest("Failed to deploy VM, skipping kvm host-ha test case")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_disable_oobm_ha_state_ineligible(self):
|
||||
"""
|
||||
Tests that when HA is enabled for a host, if oobm is disabled HA State should turn into Ineligible
|
||||
"""
|
||||
self.logger.debug("Starting test_disable_oobm_ha_state_ineligible")
|
||||
|
||||
# Enable ha for host
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
# Disable OOBM
|
||||
self.apiclient.configureOutOfBandManagement(self.getOobmConfigCmd())
|
||||
oobm_cmd = self.getOobmDisableCmd()
|
||||
oobm_cmd.hostid = cmd.hostid
|
||||
oobm_cmd.hostid = self.host.id
|
||||
response = self.apiclient.disableOutOfBandManagementForHost(oobm_cmd)
|
||||
self.assertEqual(response.hostid, oobm_cmd.hostid)
|
||||
self.assertEqual(response.enabled, False)
|
||||
|
||||
response = self.getHost(hostId=cmd.hostid).outofbandmanagement
|
||||
response = self.getHost(hostId=self.host.id).outofbandmanagement
|
||||
self.assertEqual(response.powerstate, 'Disabled')
|
||||
|
||||
# Verify HA State is Ineligeble
|
||||
response = self.getHost(hostId=cmd.hostid).hostha
|
||||
self.assertEqual(response.hastate, "Ineligible")
|
||||
self.waitUntilHostInState("Ineligible")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_configure_default_driver(self):
|
||||
"""
|
||||
Tests host-ha configuration with valid data
|
||||
"""
|
||||
self.logger.debug("Starting test_hostha_configure_default_driver")
|
||||
|
||||
cmd = self.getHostHaConfigCmd()
|
||||
response = self.apiclient.configureHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haprovider, cmd.provider.lower())
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_enable_ha_when_host_powerstate_on(self):
|
||||
"""
|
||||
Tests that when HA is enabled for a host, if oobm state is on HA State should turn into Available
|
||||
"""
|
||||
|
||||
self.configureAndStartIpmiServer()
|
||||
|
||||
self.assertIssueCommandState('ON', 'On')
|
||||
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
response = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
# Verify HA State is Available
|
||||
self.check_host_transition_to_available()
|
||||
|
||||
response = self.getHost()
|
||||
if response.hostha.hastate is not "Available":
|
||||
print response
|
||||
|
||||
self.assertEqual(response.hostha.hastate, "Available")
|
||||
|
||||
self.stopIpmiServer()
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_enable_feature_without_setting_provider(self):
|
||||
"""
|
||||
Tests Enable HA without setting the provider, Exception is thrown
|
||||
"""
|
||||
host = self.get_non_configured_ha_host()
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
cmd.hostid = host.id
|
||||
|
||||
try:
|
||||
self.apiclient.enableHAForHost(cmd)
|
||||
except Exception as e:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="treu")
|
||||
def test_hostha_enable_ha_when_host_disabled(self):
|
||||
"""
|
||||
Tests Enable HA when host is disconnected, should be Ineligible
|
||||
"""
|
||||
self.logger.debug("Starting test_hostha_enable_ha_when_host_disabled")
|
||||
|
||||
# Enable HA
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
cmd.hostid = self.host.id
|
||||
enable = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(enable.hostid, cmd.hostid)
|
||||
self.assertEqual(enable.haenable, True)
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
# Disable Host
|
||||
self.disableHost(self.host.id)
|
||||
|
||||
# Check HA State
|
||||
try:
|
||||
response = self.getHost(self.host.id)
|
||||
self.assertEqual(response.hostha.hastate, "Ineligible")
|
||||
self.waitUntilHostInState("Ineligible")
|
||||
except Exception as e:
|
||||
self.enableHost(self.host.id)
|
||||
self.fail(e)
|
||||
|
|
@ -462,46 +268,39 @@ class TestHAKVM(cloudstackTestCase):
|
|||
self.enableHost(self.host.id)
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_enable_ha_when_host_inMaintenance(self):
|
||||
def test_hostha_enable_ha_when_host_in_maintenance(self):
|
||||
"""
|
||||
Tests Enable HA when host is in Maintenance mode, should be Ineligible
|
||||
"""
|
||||
|
||||
host = self.getHost()
|
||||
self.logger.debug("Starting test_hostha_enable_ha_when_host_in_maintenance")
|
||||
|
||||
# Enable HA
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
cmd.hostid = host.id
|
||||
enable = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(enable.hostid, cmd.hostid)
|
||||
self.assertEqual(enable.haenable, True)
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
# Prepare for maintenance Host
|
||||
self.setHostToMaintanance(host.id)
|
||||
self.setHostToMaintanance(self.host.id)
|
||||
|
||||
# Check HA State
|
||||
try:
|
||||
response = self.getHost(host.id)
|
||||
self.assertEqual(response.hostha.hastate, "Ineligible")
|
||||
self.waitUntilHostInState("Ineligible")
|
||||
except Exception as e:
|
||||
self.cancelMaintenance(host.id)
|
||||
self.cancelMaintenance()
|
||||
self.fail(e)
|
||||
|
||||
# Enable Host
|
||||
self.cancelMaintenance(host.id)
|
||||
self.cancelMaintenance()
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_enable_ha_when_host_disconected(self):
|
||||
"""
|
||||
Tests Enable HA when host is disconnected, should be Ineligible
|
||||
"""
|
||||
host = self.getHost()
|
||||
self.logger.debug("Starting test_hostha_enable_ha_when_host_disconected")
|
||||
|
||||
# Enable HA
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
cmd.hostid = host.id
|
||||
cmd.hostid = self.host.id
|
||||
enable = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(enable.hostid, cmd.hostid)
|
||||
self.assertEqual(enable.haenable, True)
|
||||
|
|
@ -511,9 +310,7 @@ class TestHAKVM(cloudstackTestCase):
|
|||
|
||||
# Check HA State
|
||||
try:
|
||||
time.sleep(1)
|
||||
response = self.getHost(self.host.id)
|
||||
self.assertEqual(response.hostha.hastate, "Ineligible")
|
||||
self.waitUntilHostInState("Ineligible")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
self.fail(e)
|
||||
|
|
@ -526,13 +323,13 @@ class TestHAKVM(cloudstackTestCase):
|
|||
"""
|
||||
Tests HA Provider should be possible to be removed when HA is enabled
|
||||
"""
|
||||
self.logger.debug("Starting test_remove_ha_provider_not_possible")
|
||||
|
||||
host = self.getHost()
|
||||
|
||||
# Enable HA
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
cmd.hostid = host.id
|
||||
cmd.hostid = self.host.id
|
||||
enable = self.apiclient.enableHAForHost(cmd)
|
||||
self.assertEqual(enable.hostid, cmd.hostid)
|
||||
self.assertEqual(enable.haenable, True)
|
||||
|
|
@ -544,6 +341,136 @@ class TestHAKVM(cloudstackTestCase):
|
|||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
@attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_kvm_host_degraded(self):
|
||||
"""
|
||||
Tests degraded HA state when agent is stopped/killed
|
||||
"""
|
||||
|
||||
self.configureAndStartIpmiServer()
|
||||
self.assertIssueCommandState('ON', 'On')
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
self.deployVM()
|
||||
|
||||
# Start with the available state
|
||||
self.waitUntilHostInState("Available")
|
||||
|
||||
# SSH into the KVM Host and executes kill -9 of the agent
|
||||
self.stopAgent()
|
||||
|
||||
# Check if host would go into Suspect state
|
||||
try:
|
||||
self.waitUntilHostInState("Suspect")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Checks if the host would turn into Degraded
|
||||
try:
|
||||
self.waitUntilHostInState("Degraded")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
self.startAgent()
|
||||
self.waitUntilHostInState("Available")
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_kvm_host_recovering(self):
|
||||
"""
|
||||
Tests recovery and fencing HA state transitions
|
||||
"""
|
||||
|
||||
self.configureAndStartIpmiServer()
|
||||
self.assertIssueCommandState('ON', 'On')
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
self.deployVM()
|
||||
|
||||
# Start with the available state
|
||||
self.waitUntilHostInState("Available")
|
||||
|
||||
# Kill host by triggering a fault
|
||||
self.killAgent()
|
||||
self.disableAgent()
|
||||
self.resetHost()
|
||||
|
||||
# Check if host would go into Suspect state
|
||||
try:
|
||||
self.waitUntilHostInState("Suspect")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Checks if the host would turn into Recovered
|
||||
try:
|
||||
self.waitUntilHostInState("Recovered")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
self.enableAgent()
|
||||
self.startAgent()
|
||||
self.waitUntilHostInState("Available")
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
|
||||
def test_hostha_kvm_host_fencing(self):
|
||||
"""
|
||||
Tests fencing/fenced HA state when host crashes
|
||||
"""
|
||||
self.logger.debug("Starting test_ha_kvm_host_fencing")
|
||||
|
||||
|
||||
self.configureAndStartIpmiServer()
|
||||
self.assertIssueCommandState('ON', 'On')
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
self.deployVM()
|
||||
|
||||
# Start with the available state
|
||||
self.waitUntilHostInState("Available")
|
||||
|
||||
# Fail oobm commands
|
||||
cmd = self.getOobmConfigCmd()
|
||||
cmd.address = "1.1.1.1"
|
||||
self.apiclient.configureOutOfBandManagement(cmd)
|
||||
|
||||
# Kill host by triggering a fault
|
||||
self.killAgent()
|
||||
self.disableAgent()
|
||||
self.resetHost()
|
||||
|
||||
# Check if host would go into Suspect state
|
||||
try:
|
||||
self.waitUntilHostInState("Suspect")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Checks if the host would turn into Fencing
|
||||
try:
|
||||
self.waitUntilHostInState("Fencing")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
# Allow oobm commands to work now
|
||||
self.configureAndEnableOobm()
|
||||
|
||||
# Checks if the host would turn into Fenced
|
||||
try:
|
||||
self.waitUntilHostInState("Fenced")
|
||||
except Exception as e:
|
||||
self.startAgent()
|
||||
raise Exception("Warning: Exception during test execution : %s" % e)
|
||||
|
||||
self.enableAgent()
|
||||
self.startAgent()
|
||||
self.cancelMaintenance()
|
||||
self.waitUntilHostInState("Available")
|
||||
|
||||
def configureAndStartIpmiServer(self, power_state=None):
|
||||
"""
|
||||
Setup ipmisim and enable out-of-band management for host
|
||||
|
|
@ -587,7 +514,7 @@ class TestHAKVM(cloudstackTestCase):
|
|||
|
||||
def getOobmIssueActionCmd(self):
|
||||
cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.hostid = self.host.id
|
||||
cmd.action = 'STATUS'
|
||||
return cmd
|
||||
|
||||
|
|
@ -606,12 +533,12 @@ class TestHAKVM(cloudstackTestCase):
|
|||
|
||||
def getOobmEnableCmd(self):
|
||||
cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getOobmDisableCmd(self):
|
||||
cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getIpmiServerPort(self):
|
||||
|
|
@ -624,7 +551,7 @@ class TestHAKVM(cloudstackTestCase):
|
|||
cmd.port = self.getIpmiServerPort()
|
||||
cmd.username = 'admin'
|
||||
cmd.password = 'password'
|
||||
cmd.hostid = self.getHost().id
|
||||
cmd.hostid = self.host.id
|
||||
return cmd
|
||||
|
||||
def getIpmiServerIp(self):
|
||||
|
|
@ -655,28 +582,31 @@ class TestHAKVM(cloudstackTestCase):
|
|||
return response[0]
|
||||
|
||||
def startAgent(self):
|
||||
host = self.getHost()
|
||||
SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"],
|
||||
SshClient(host=self.host.ipaddress, port=22, user=self.hostConfig["username"],
|
||||
passwd=self.hostConfig["password"]).execute \
|
||||
("service cloudstack-agent start")
|
||||
("systemctl start cloudstack-agent || service cloudstack-agent start")
|
||||
|
||||
def stopAgent(self):
|
||||
SshClient(host=self.host.ipaddress, port=22, user=self.hostConfig["username"],
|
||||
passwd=self.hostConfig["password"]).execute \
|
||||
("systemctl stop cloudstack-agent || service cloudstack-agent stop")
|
||||
|
||||
def killAgent(self):
|
||||
SshClient(host=self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\
|
||||
("kill -9 $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')")
|
||||
|
||||
def disableHost(self, id):
|
||||
|
||||
cmd = updateHost.updateHostCmd()
|
||||
cmd.id = id
|
||||
cmd.allocationstate = "Disable"
|
||||
|
||||
response = self.apiclient.updateHost(cmd)
|
||||
|
||||
self.assertEqual(response.resourcestate, "Disabled")
|
||||
|
||||
def enableHost(self, id):
|
||||
cmd = updateHost.updateHostCmd()
|
||||
cmd.id = id
|
||||
cmd.allocationstate = "Enable"
|
||||
|
||||
response = self.apiclient.updateHost(cmd)
|
||||
|
||||
self.assertEqual(response.resourcestate, "Enabled")
|
||||
|
||||
def setHostToMaintanance(self, id):
|
||||
|
|
@ -687,15 +617,9 @@ class TestHAKVM(cloudstackTestCase):
|
|||
|
||||
self.assertEqual(response.resourcestate, "PrepareForMaintenance")
|
||||
|
||||
def cancelMaintenance(self, id):
|
||||
def cancelMaintenance(self):
|
||||
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
|
||||
cmd.id = id
|
||||
|
||||
cmd.id = self.host.id
|
||||
response = self.apiclient.cancelHostMaintenance(cmd)
|
||||
|
||||
self.assertEqual(response.resourcestate, "Enabled")
|
||||
|
||||
def killAgent(self):
|
||||
host = self.getHost()
|
||||
SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\
|
||||
("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')")
|
||||
|
|
@ -23,8 +23,6 @@ from marvin.lib.base import *
|
|||
from marvin.lib.common import *
|
||||
from nose.plugins.attrib import attr
|
||||
|
||||
import random
|
||||
|
||||
from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer
|
||||
|
||||
import random
|
||||
|
|
@ -35,7 +33,7 @@ import time
|
|||
|
||||
|
||||
class TestHostHA(cloudstackTestCase):
|
||||
""" Test cases for host HA using Simulator host(s)
|
||||
""" Test host-ha business logic using Simulator
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
|
|
@ -45,10 +43,10 @@ class TestHostHA(cloudstackTestCase):
|
|||
self.services = self.testClient.getParsedTestDataConfig()
|
||||
self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__
|
||||
self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20)
|
||||
self.host = None
|
||||
|
||||
# Cleanup any existing configs
|
||||
self.dbclient.execute("delete from ha_config where resource_type='Host'")
|
||||
self.host = None
|
||||
|
||||
# use random port for ipmisim
|
||||
s = socket.socket()
|
||||
|
|
@ -56,10 +54,17 @@ class TestHostHA(cloudstackTestCase):
|
|||
self.serverPort = s.getsockname()[1]
|
||||
s.close()
|
||||
|
||||
# Get a host to run tests against
|
||||
self.host = self.getHost()
|
||||
|
||||
self.cleanup = []
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
host = self.getHost()
|
||||
self.configureAndDisableHostHa(host.id)
|
||||
self.host = None
|
||||
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
|
||||
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
|
||||
self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'")
|
||||
|
|
@ -70,12 +75,15 @@ class TestHostHA(cloudstackTestCase):
|
|||
except Exception as e:
|
||||
raise Exception("Warning: Exception during cleanup : %s" % e)
|
||||
|
||||
|
||||
def getFakeMsId(self):
|
||||
return self.fakeMsId
|
||||
|
||||
|
||||
def getFakeMsRunId(self):
|
||||
return self.fakeMsId * 1000
|
||||
|
||||
|
||||
def getHost(self, hostId=None):
|
||||
if self.host and hostId is None:
|
||||
return self.host
|
||||
|
|
@ -87,10 +95,13 @@ class TestHostHA(cloudstackTestCase):
|
|||
resourcestate='Enabled',
|
||||
id=hostId
|
||||
)
|
||||
|
||||
if response and len(response) > 0:
|
||||
random.shuffle(response)
|
||||
self.host = response[0]
|
||||
return self.host
|
||||
raise self.skipTest("No simulator hosts found, skipping host-ha test")
|
||||
raise self.skipTest("No suitable hosts found, skipping host-ha test")
|
||||
|
||||
|
||||
def getHostHaConfigCmd(self, provider='simulatorhaprovider'):
|
||||
cmd = configureHAForHost.configureHAForHostCmd()
|
||||
|
|
@ -98,16 +109,25 @@ class TestHostHA(cloudstackTestCase):
|
|||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
|
||||
def getHostHaEnableCmd(self):
|
||||
cmd = enableHAForHost.enableHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
|
||||
def getHostHaDisableCmd(self):
|
||||
cmd = disableHAForHost.disableHAForHostCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
|
||||
def getListHostHAResources(self):
|
||||
cmd = listHostHAResources.listHostHAResourcesCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
return cmd
|
||||
|
||||
|
||||
def configureAndEnableHostHa(self, initialize=True):
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd())
|
||||
|
|
@ -115,6 +135,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
if initialize:
|
||||
self.configureSimulatorHAProviderState(True, True, True, False)
|
||||
|
||||
|
||||
def configureAndDisableHostHa(self, hostId):
|
||||
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
|
||||
cmd = self.getHostHaDisableCmd()
|
||||
|
|
@ -123,6 +144,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, False)
|
||||
|
||||
|
||||
def enableHostHa(self, hostId):
|
||||
cmd = self.getHostHaEnableCmd()
|
||||
cmd.hostid = hostId
|
||||
|
|
@ -130,6 +152,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
|
||||
def configureSimulatorHAProviderState(self, health, activity, recover, fence):
|
||||
cmd = configureSimulatorHAProviderState.configureSimulatorHAProviderStateCmd()
|
||||
cmd.hostid = self.getHost().id
|
||||
|
|
@ -140,24 +163,27 @@ class TestHostHA(cloudstackTestCase):
|
|||
response = self.apiclient.configureSimulatorHAProviderState(cmd)
|
||||
self.assertEqual(response.success, 'true')
|
||||
|
||||
|
||||
def getSimulatorHAStateTransitions(self, hostId):
|
||||
cmd = listSimulatorHAStateTransitions.listSimulatorHAStateTransitionsCmd()
|
||||
cmd.hostid = hostId
|
||||
return self.apiclient.listSimulatorHAStateTransitions(cmd)
|
||||
|
||||
|
||||
def checkSyncToState(self, state, interval=5000):
|
||||
def checkForStateSync(expectedState):
|
||||
response = self.getHost(hostId=self.getHost().id).hostha
|
||||
return response.hastate == expectedState, None
|
||||
|
||||
sync_interval = 1 + int(interval) / 1000
|
||||
res, _ = wait_until(sync_interval, 10, checkForStateSync, state)
|
||||
res, _ = wait_until(sync_interval, 100, checkForStateSync, state)
|
||||
if not res:
|
||||
self.fail("Failed to get host.hastate synced to expected state:" + state)
|
||||
response = self.getHost(hostId=self.getHost().id).hostha
|
||||
self.assertEqual(response.hastate, state)
|
||||
|
||||
def get_non_configured_ha_host(self):
|
||||
|
||||
def getNonConfiguredHaHost(self):
|
||||
response = list_hosts(
|
||||
self.apiclient,
|
||||
type='Routing'
|
||||
|
|
@ -168,12 +194,13 @@ class TestHostHA(cloudstackTestCase):
|
|||
else:
|
||||
return None
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_hostha_enable_feature_without_setting_provider(self):
|
||||
"""
|
||||
Tests Enable HA without setting the provider, Exception is thrown
|
||||
"""
|
||||
host = self.get_non_configured_ha_host()
|
||||
host = self.getNonConfiguredHaHost()
|
||||
|
||||
if host is None:
|
||||
cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.")
|
||||
|
|
@ -188,6 +215,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_list_providers(self):
|
||||
"""
|
||||
|
|
@ -203,6 +231,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
response = self.apiclient.listHostHAProviders(cmd)[0]
|
||||
self.assertEqual(response.haprovider, 'KVMHAProvider')
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_hostha_configure_invalid_provider(self):
|
||||
"""
|
||||
|
|
@ -217,6 +246,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_hostha_configure_default_driver(self):
|
||||
"""
|
||||
|
|
@ -227,6 +257,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haprovider, cmd.provider.lower())
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_enable_feature_invalid(self):
|
||||
"""
|
||||
|
|
@ -255,6 +286,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_disable_feature_invalid(self):
|
||||
"""
|
||||
|
|
@ -284,6 +316,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_hostha_enable_feature_valid(self):
|
||||
"""
|
||||
|
|
@ -295,6 +328,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
self.assertEqual(response.hostid, cmd.hostid)
|
||||
self.assertEqual(response.haenable, True)
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_hostha_disable_feature_valid(self):
|
||||
"""
|
||||
|
|
@ -309,15 +343,16 @@ class TestHostHA(cloudstackTestCase):
|
|||
response = self.getHost(hostId=cmd.hostid).hostha
|
||||
self.assertEqual(response.hastate, 'Disabled')
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_enabledisable_across_clusterzones(self):
|
||||
def test_ha_configure_enabledisable_across_clusterzones(self):
|
||||
"""
|
||||
Tests ha enable/disable feature at cluster and zone level
|
||||
Zone > Cluster > Host
|
||||
"""
|
||||
host = self.getHost()
|
||||
self.configureAndEnableHostHa()
|
||||
|
||||
host = self.getHost()
|
||||
self.checkSyncToState('Available')
|
||||
response = self.getHost(hostId=host.id).hostha
|
||||
self.assertTrue(response.hastate == 'Available')
|
||||
|
|
@ -363,12 +398,16 @@ class TestHostHA(cloudstackTestCase):
|
|||
# Check state sync
|
||||
self.checkSyncToState('Available')
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_multiple_mgmt_server_ownership(self):
|
||||
"""
|
||||
Tests ha resource ownership expiry across multi-mgmt server
|
||||
"""
|
||||
self.configureAndEnableHostHa()
|
||||
host = self.getHost()
|
||||
self.configureAndDisableHostHa(host.id)
|
||||
self.configureSimulatorHAProviderState(True, True, True, False)
|
||||
self.configureAndEnableHostHa(False)
|
||||
|
||||
cloudstackVersion = Configurations.listCapabilities(self.apiclient).cloudstackversion
|
||||
|
||||
|
|
@ -416,7 +455,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
|
||||
retry_interval = 1 + (pingInterval * pingTimeout / 10)
|
||||
|
||||
res, _ = wait_until(retry_interval, 10, removeFakeMgmtServer, self.getFakeMsRunId())
|
||||
res, _ = wait_until(retry_interval, 20, removeFakeMgmtServer, self.getFakeMsRunId())
|
||||
if not res:
|
||||
self.fail("Management server failed to turn down or remove fake mgmt server")
|
||||
|
||||
|
|
@ -432,23 +471,32 @@ class TestHostHA(cloudstackTestCase):
|
|||
newOwnerId = result[0][0]
|
||||
self.assertTrue(newOwnerId in currentMsHosts)
|
||||
|
||||
|
||||
def checkFSMTransition(self, transition, event, haState, prevHaState, hasActiviyCounter, hasRecoveryCounter):
|
||||
self.assertEqual(transition.event, event)
|
||||
self.assertEqual(transition.hastate, haState)
|
||||
self.assertEqual(transition.prevhastate, prevHaState)
|
||||
if hasActiviyCounter:
|
||||
|
||||
if hasActiviyCounter is None:
|
||||
pass
|
||||
elif hasActiviyCounter:
|
||||
self.assertTrue(transition.activitycounter > 0)
|
||||
else:
|
||||
self.assertEqual(transition.activitycounter, 0)
|
||||
if hasRecoveryCounter:
|
||||
|
||||
if hasRecoveryCounter is None:
|
||||
pass
|
||||
elif hasRecoveryCounter:
|
||||
self.assertTrue(transition.recoverycounter > 0)
|
||||
else:
|
||||
self.assertEqual(transition.recoverycounter, 0)
|
||||
|
||||
|
||||
def findFSMTransitionToState(self, state, host):
|
||||
transitions = self.getSimulatorHAStateTransitions(host.id)
|
||||
if not transitions:
|
||||
return False, (None, None, None)
|
||||
|
||||
previousTransition = None
|
||||
stateTransition = None
|
||||
nextTransition = None
|
||||
|
|
@ -460,10 +508,12 @@ class TestHostHA(cloudstackTestCase):
|
|||
stateTransition = transition
|
||||
if not stateTransition:
|
||||
previousTransition = transition
|
||||
|
||||
if stateTransition:
|
||||
return True, (previousTransition, stateTransition, nextTransition,)
|
||||
return False, (previousTransition, stateTransition, nextTransition,)
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_verify_fsm_available(self):
|
||||
"""
|
||||
|
|
@ -472,15 +522,17 @@ class TestHostHA(cloudstackTestCase):
|
|||
"""
|
||||
|
||||
host = self.getHost()
|
||||
self.configureAndDisableHostHa(host.id)
|
||||
self.configureSimulatorHAProviderState(True, True, True, False)
|
||||
self.configureAndEnableHostHa(False)
|
||||
|
||||
res, (_, T, _) = wait_until(2, 20, self.findFSMTransitionToState, 'available', host)
|
||||
res, (_, T, _) = wait_until(3, 20, self.findFSMTransitionToState, 'available', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to available state")
|
||||
|
||||
self.checkFSMTransition(T, 'enabled', 'available', 'disabled', False, False)
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_verify_fsm_degraded(self):
|
||||
"""
|
||||
|
|
@ -490,26 +542,26 @@ class TestHostHA(cloudstackTestCase):
|
|||
Available->Suspect<->Checking->Degraded->Available
|
||||
"""
|
||||
host = self.getHost()
|
||||
self.configureAndDisableHostHa(host.id)
|
||||
self.configureSimulatorHAProviderState(False, True, True, False)
|
||||
self.configureAndEnableHostHa(False)
|
||||
|
||||
# Initial health check failure
|
||||
res, (_, T, _) = wait_until(2, 20, self.findFSMTransitionToState, 'suspect', host)
|
||||
res, (_, T, _) = wait_until(3, 50, self.findFSMTransitionToState, 'suspect', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to suspect state")
|
||||
|
||||
self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False)
|
||||
|
||||
# Check transition to Degraded
|
||||
res, (prevT, T, nextT) = wait_until(2, 20, self.findFSMTransitionToState, 'degraded', host)
|
||||
res, (prevT, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'degraded', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to degraded state")
|
||||
|
||||
if prevT:
|
||||
self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False)
|
||||
self.checkFSMTransition(T, 'activitycheckfailureunderthresholdratio', 'degraded', 'checking', True, False)
|
||||
if nextT:
|
||||
self.checkFSMTransition(nextT, 'periodicrecheckresourceactivity', 'suspect', 'degraded', False, False)
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_verify_fsm_recovering(self):
|
||||
|
|
@ -520,36 +572,33 @@ class TestHostHA(cloudstackTestCase):
|
|||
Available->Suspect<->Checking->Recovering->Recovered<-retry-loop->->Fencing
|
||||
"""
|
||||
host = self.getHost()
|
||||
self.configureAndDisableHostHa(host.id)
|
||||
self.configureSimulatorHAProviderState(False, False, True, False)
|
||||
self.configureAndEnableHostHa(False)
|
||||
|
||||
# Initial health check failure
|
||||
res, (_, T, _) = wait_until(2, 30, self.findFSMTransitionToState, 'suspect', host)
|
||||
res, (_, T, _) = wait_until(3, 50, self.findFSMTransitionToState, 'suspect', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to suspect state")
|
||||
|
||||
self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False)
|
||||
|
||||
# Check transition to recovering
|
||||
res, (prevT, T, nextT) = wait_until(2, 60, self.findFSMTransitionToState, 'recovering', host)
|
||||
res, (prevT, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'recovering', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to recovering state")
|
||||
|
||||
if prevT:
|
||||
self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False)
|
||||
self.checkFSMTransition(T, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, False)
|
||||
if nextT:
|
||||
self.checkFSMTransition(nextT, 'recovered', 'recovered', 'recovering', False, True)
|
||||
|
||||
# Check transition to fencing due to recovery attempts exceeded
|
||||
res, (prevT, T, nextT) = wait_until(2, 60, self.findFSMTransitionToState, 'fencing', host)
|
||||
res, (_, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'fencing', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to fencing state")
|
||||
|
||||
if prevT:
|
||||
self.checkFSMTransition(prevT, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True,
|
||||
True)
|
||||
self.checkFSMTransition(T, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True)
|
||||
self.checkFSMTransition(T, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', None, True)
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_ha_verify_fsm_fenced(self):
|
||||
|
|
@ -559,19 +608,18 @@ class TestHostHA(cloudstackTestCase):
|
|||
Available->Suspect<->Checking->Recovering<-fail recovery->->Fencing->Fenced
|
||||
"""
|
||||
host = self.getHost()
|
||||
self.configureAndDisableHostHa(host.id)
|
||||
self.configureSimulatorHAProviderState(False, False, False, True)
|
||||
self.configureAndEnableHostHa(False)
|
||||
|
||||
# Check for transition to fenced
|
||||
res, (prevT, T, _) = wait_until(2, 30, self.findFSMTransitionToState, 'fenced', host)
|
||||
res, (prevT, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'fenced', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to fenced state")
|
||||
|
||||
self.checkFSMTransition(prevT, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True)
|
||||
self.checkFSMTransition(T, 'fenced', 'fenced', 'fencing', False, False)
|
||||
|
||||
# TODO: add test case for HA vm reboot checks
|
||||
|
||||
# Simulate manual recovery of host and cancel maintenance mode
|
||||
self.configureSimulatorHAProviderState(True, True, True, False)
|
||||
cancelCmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
|
||||
|
|
@ -579,13 +627,13 @@ class TestHostHA(cloudstackTestCase):
|
|||
self.apiclient.cancelHostMaintenance(cancelCmd)
|
||||
|
||||
# Check for transition to available after manual recovery
|
||||
res, (prevT, T, _) = wait_until(2, 20, self.findFSMTransitionToState, 'available', host)
|
||||
res, (prevT, T, _) = wait_until(3, 50, self.findFSMTransitionToState, 'available', host)
|
||||
if not res:
|
||||
self.fail("FSM did not transition to available state")
|
||||
|
||||
self.checkFSMTransition(prevT, 'healthcheckpassed', 'ineligible', 'fenced', False, False)
|
||||
self.checkFSMTransition(T, 'eligible', 'available', 'ineligible', False, False)
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_configure_ha_provider_invalid(self):
|
||||
"""
|
||||
|
|
@ -618,6 +666,7 @@ class TestHostHA(cloudstackTestCase):
|
|||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
||||
|
||||
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
|
||||
def test_configure_ha_provider_valid(self):
|
||||
"""
|
||||
|
|
@ -649,3 +698,75 @@ class TestHostHA(cloudstackTestCase):
|
|||
# Check the response contains the set provider and hostID
|
||||
self.assertEqual(response.haprovider, conf_ha_cmd.provider)
|
||||
self.assertEqual(response.hostid, conf_ha_cmd.hostid)
|
||||
|
||||
|
||||
def getHaProvider(self, host):
|
||||
cmd = listHostHAProviders.listHostHAProvidersCmd()
|
||||
cmd.hypervisor = host.hypervisor
|
||||
response = self.apiclient.listHostHAProviders(cmd)
|
||||
return response[0].haprovider
|
||||
|
||||
|
||||
def configureHaProvider(self):
|
||||
cmd = self.getHostHaConfigCmd(self.getHaProvider(self.getHost()))
|
||||
return self.apiclient.configureHAForHost(cmd)
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_list_ha_for_host(self):
|
||||
"""
|
||||
Test that verifies the listHAForHost API
|
||||
"""
|
||||
self.configureHaProvider()
|
||||
db_count = self.dbclient.execute("SELECT count(*) FROM cloud.ha_config")
|
||||
|
||||
cmd = self.getListHostHAResources()
|
||||
del cmd.hostid
|
||||
response = self.apiclient.listHostHAResources(cmd)
|
||||
|
||||
self.assertEqual(db_count[0][0], len(response))
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_list_ha_for_host_valid(self):
|
||||
"""
|
||||
Valid test for listing a specific host HA resources
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getListHostHAResources()
|
||||
response = self.apiclient.listHostHAResources(cmd)
|
||||
self.assertEqual(response[0].hostid, cmd.hostid)
|
||||
|
||||
|
||||
@attr(tags=["advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"sg"],
|
||||
required_hardware="false")
|
||||
def test_list_ha_for_host_invalid(self):
|
||||
"""
|
||||
Test that listHostHAResources is returning exception when called with invalid data
|
||||
"""
|
||||
|
||||
self.configureHaProvider()
|
||||
cmd = self.getListHostHAResources()
|
||||
cmd.hostid = "someinvalidvalue"
|
||||
|
||||
try:
|
||||
response = self.apiclient.listHostHAResources(cmd)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self.fail("Expected an exception to be thrown, failing")
|
||||
|
|
|
|||
Loading…
Reference in New Issue