Merge remote-tracking branch 'origin/4.11'

Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
Rohit Yadav 2018-12-28 15:20:23 +05:30
commit 92cc4514ea
4 changed files with 84 additions and 4 deletions

View File

@ -38,12 +38,20 @@ import com.cloud.host.Status;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceState.Event;
import com.cloud.utils.fsm.NoTransitionException;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
/**
* ResourceManager manages how physical resources are organized within the
* CloudStack. It also manages the life cycle of the physical resources.
*/
public interface ResourceManager extends ResourceService {
public interface ResourceManager extends ResourceService, Configurable {
ConfigKey<Integer> HostMaintenanceRetries = new ConfigKey<>("Advanced", Integer.class,
"host.maintenance.retries","20",
"Number of retries when preparing a host into Maintenance Mode is faulty before failing",
true, ConfigKey.Scope.Cluster);
/**
* Register a listener for different types of resource life cycle events.
* There can only be one type of listener per type of host.

View File

@ -26,11 +26,17 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.vm.dao.UserVmDetailsDao;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.commons.lang.ObjectUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.command.admin.cluster.AddClusterCmd;
import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd;
@ -47,9 +53,6 @@ import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.ObjectUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
@ -270,6 +273,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
private SearchBuilder<HostGpuGroupsVO> _gpuAvailability;
private Map<Long,Integer> retryHostMaintenance = new ConcurrentHashMap<>();
private void insertListener(final Integer event, final ResourceListener listener) {
List<ResourceListener> lst = _lifeCycleListeners.get(event);
if (lst == null) {
@ -1222,6 +1227,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
ActionEventUtils.onStartedActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), EventTypes.EVENT_MAINTENANCE_PREPARE, "starting maintenance for host " + hostId, true, 0);
_agentMgr.pullAgentToMaintenance(hostId);
setHostMaintenanceRetries(host);
/* TODO: move below to listener */
if (host.getType() == Host.Type.Routing) {
@ -1249,6 +1255,16 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
return true;
}
/**
* Set retries for transiting the host into Maintenance
*/
protected void setHostMaintenanceRetries(HostVO host) {
Integer retries = HostMaintenanceRetries.valueIn(host.getClusterId());
retryHostMaintenance.put(host.getId(), retries);
s_logger.debug(String.format("Setting the host %s (%s) retries for Maintenance mode: %s",
host.getId(), host.getName(), retries));
}
@Override
public boolean maintain(final long hostId) throws AgentUnavailableException {
final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.AdminAskMaintenace);
@ -1348,7 +1364,23 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
return CollectionUtils.isEmpty(failedMigrations) ?
setHostIntoMaintenance(host) :
setHostIntoErrorInMaintenance(host, failedMigrations);
} else if (retryHostMaintenance.containsKey(host.getId())) {
Integer retriesLeft = retryHostMaintenance.get(host.getId());
if (retriesLeft != null) {
if (retriesLeft <= 0) {
retryHostMaintenance.remove(host.getId());
s_logger.debug(String.format("No retries left while preparing KVM host %s (%s) for Maintenance, " +
"please investigate this connection.",
host.getId(), host.getName()));
return setHostIntoErrorInMaintenance(host, failedMigrations);
}
retriesLeft--;
retryHostMaintenance.put(host.getId(), retriesLeft);
s_logger.debug(String.format("Retries left preparing KVM host %s (%s) for Maintenance: %s",
host.getId(), host.getName(), retriesLeft));
}
}
return false;
}
@ -2314,6 +2346,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
try {
resourceStateTransitTo(host, ResourceState.Event.AdminCancelMaintenance, _nodeId);
_agentMgr.pullAgentOutMaintenance(hostId);
retryHostMaintenance.remove(hostId);
// for kvm, need to log into kvm host, restart cloudstack-agent
if ((host.getHypervisorType() == HypervisorType.KVM && !vms_migrating) || host.getHypervisorType() == HypervisorType.LXC) {
@ -2908,4 +2941,14 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
return false;
}
}
@Override
public String getConfigComponentName() {
return ResourceManagerImpl.class.getSimpleName();
}
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {HostMaintenanceRetries};
}
}

View File

@ -56,6 +56,7 @@ import com.cloud.org.Cluster;
import com.cloud.resource.ResourceState.Event;
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.fsm.NoTransitionException;
import org.apache.cloudstack.framework.config.ConfigKey;
public class MockResourceManagerImpl extends ManagerBase implements ResourceManager {
@ -625,4 +626,14 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
// TODO Auto-generated method stub
return false;
}
@Override
public String getConfigComponentName() {
return null;
}
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey[0];
}
}

View File

@ -118,6 +118,7 @@ public class ResourceManagerImplTest {
when(host.getId()).thenReturn(hostId);
when(host.getResourceState()).thenReturn(ResourceState.Enabled);
when(host.getHypervisorType()).thenReturn(Hypervisor.HypervisorType.VMware);
when(host.getClusterId()).thenReturn(1L);
when(hostDao.findById(hostId)).thenReturn(host);
when(vm1.getId()).thenReturn(vm1Id);
when(vm2.getId()).thenReturn(vm2Id);
@ -188,4 +189,21 @@ public class ResourceManagerImplTest {
verify(userVmDetailsDao).addDetail(eq(vm2Id), eq("kvm.vnc.port"), eq(String.valueOf(vm2VncPort)), anyBoolean());
verify(agentManager).pullAgentToMaintenance(hostId);
}
@Test
public void testCheckAndMaintainErrorInMaintenanceRetries() throws NoTransitionException {
resourceManager.setHostMaintenanceRetries(host);
List<VMInstanceVO> failedMigrations = Arrays.asList(vm1, vm2);
when(vmInstanceDao.listByHostId(host.getId())).thenReturn(failedMigrations);
when(vmInstanceDao.listNonMigratingVmsByHostEqualsLastHost(host.getId())).thenReturn(failedMigrations);
Integer retries = ResourceManager.HostMaintenanceRetries.valueIn(host.getClusterId());
for (int i = 0; i <= retries; i++) {
resourceManager.checkAndMaintain(host.getId());
}
verify(resourceManager, times(retries + 1)).isHostInMaintenance(host, failedMigrations, new ArrayList<>(), failedMigrations);
verify(resourceManager).setHostIntoErrorInMaintenance(host, failedMigrations);
}
}