mirror of https://github.com/apache/cloudstack.git
VM start error handling improvements and config to expose error to users (#12894)
* VM start error handling improvements, and config to expose error to user * refactor
This commit is contained in:
parent
6ca6aa1c3f
commit
68030df10b
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package com.cloud.vm;
|
||||
|
||||
import static com.cloud.configuration.ConfigurationManagerImpl.EXPOSE_ERRORS_TO_USER;
|
||||
import static com.cloud.configuration.ConfigurationManagerImpl.MIGRATE_VM_ACROSS_CLUSTERS;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
|
|
@ -931,10 +932,22 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
|||
public void start(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner) {
|
||||
try {
|
||||
advanceStart(vmUuid, params, planToDeploy, planner);
|
||||
} catch (ConcurrentOperationException | InsufficientCapacityException e) {
|
||||
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to [%s].", vmUuid, e.getMessage()), e).add(VirtualMachine.class, vmUuid);
|
||||
} catch (ConcurrentOperationException e) {
|
||||
final CallContext cctxt = CallContext.current();
|
||||
final Account account = cctxt.getCallingAccount();
|
||||
if (canExposeError(account)) {
|
||||
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to [%s].", vmUuid, e.getMessage()), e).add(VirtualMachine.class, vmUuid);
|
||||
}
|
||||
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to concurrent operation.", vmUuid), e).add(VirtualMachine.class, vmUuid);
|
||||
} catch (final InsufficientCapacityException e) {
|
||||
final CallContext cctxt = CallContext.current();
|
||||
final Account account = cctxt.getCallingAccount();
|
||||
if (canExposeError(account)) {
|
||||
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to [%s].", vmUuid, e.getMessage()), e).add(VirtualMachine.class, vmUuid);
|
||||
}
|
||||
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to insufficient capacity.", vmUuid), e).add(VirtualMachine.class, vmUuid);
|
||||
} catch (final ResourceUnavailableException e) {
|
||||
if (e.getScope() != null && e.getScope().equals(VirtualRouter.class)){
|
||||
if (e.getScope() != null && e.getScope().equals(VirtualRouter.class)) {
|
||||
throw new CloudRuntimeException("Network is unavailable. Please contact administrator", e).add(VirtualMachine.class, vmUuid);
|
||||
}
|
||||
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to [%s].", vmUuid, e.getMessage()), e).add(VirtualMachine.class, vmUuid);
|
||||
|
|
@ -1361,6 +1374,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
|||
|
||||
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
|
||||
|
||||
Throwable lastKnownError = null;
|
||||
boolean canRetry = true;
|
||||
ExcludeList avoids = null;
|
||||
try {
|
||||
|
|
@ -1384,7 +1398,8 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
|||
|
||||
int retry = StartRetry.value();
|
||||
while (retry-- != 0) {
|
||||
logger.debug("Instance start attempt #{}", (StartRetry.value() - retry));
|
||||
int attemptNumber = StartRetry.value() - retry;
|
||||
logger.debug("Instance start attempt #{}", attemptNumber);
|
||||
|
||||
if (reuseVolume) {
|
||||
final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
|
||||
|
|
@ -1450,8 +1465,13 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
|||
reuseVolume = false;
|
||||
continue;
|
||||
}
|
||||
throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId(),
|
||||
areAffinityGroupsAssociated(vmProfile));
|
||||
String message = String.format("Unable to create a deployment for %s after %s attempts", vmProfile, attemptNumber);
|
||||
if (canExposeError(account) && lastKnownError != null) {
|
||||
message += String.format(" Last known error: %s", lastKnownError.getMessage());
|
||||
throw new CloudRuntimeException(message, lastKnownError);
|
||||
} else {
|
||||
throw new InsufficientServerCapacityException(message, DataCenter.class, plan.getDataCenterId(), areAffinityGroupsAssociated(vmProfile));
|
||||
}
|
||||
}
|
||||
|
||||
avoids.addHost(dest.getHost().getId());
|
||||
|
|
@ -1619,11 +1639,15 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
|||
throw new ExecutionException("Unable to start VM:" + vm.getUuid() + " due to error in finalizeStart, not retrying");
|
||||
}
|
||||
}
|
||||
logger.info("Unable to start VM on {} due to {}", dest.getHost(), (startAnswer == null ? " no start answer" : startAnswer.getDetails()));
|
||||
String msg = String.format("Unable to start VM on %s due to %s", dest.getHost(), startAnswer == null ? "no start command answer" : startAnswer.getDetails());
|
||||
lastKnownError = new ExecutionException(msg);
|
||||
|
||||
if (startAnswer != null && startAnswer.getContextParam("stopRetry") != null) {
|
||||
logger.error(msg, lastKnownError);
|
||||
break;
|
||||
}
|
||||
|
||||
logger.debug(msg, lastKnownError);
|
||||
} catch (OperationTimedoutException e) {
|
||||
logger.debug("Unable to send the start command to host {} failed to start VM: {}", dest.getHost(), vm);
|
||||
if (e.isActive()) {
|
||||
|
|
@ -1633,6 +1657,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
|||
throw new AgentUnavailableException("Unable to start " + vm.getHostName(), destHostId, e);
|
||||
} catch (final ResourceUnavailableException e) {
|
||||
logger.warn("Unable to contact resource.", e);
|
||||
lastKnownError = e;
|
||||
if (!avoids.add(e)) {
|
||||
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
|
||||
throw e;
|
||||
|
|
@ -1689,10 +1714,22 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
|||
}
|
||||
|
||||
if (startedVm == null) {
|
||||
throw new CloudRuntimeException("Unable to start Instance '" + vm.getHostName() + "' (" + vm.getUuid() + "), see management server log for details");
|
||||
String messageTmpl = "Unable to start Instance '%s' (%s)%s";
|
||||
String details;
|
||||
if (canExposeError(account) && lastKnownError != null) {
|
||||
details = ": " + lastKnownError.getMessage();
|
||||
} else {
|
||||
details = ", see management server log for details";
|
||||
}
|
||||
String message = String.format(messageTmpl, vm.getHostName(), vm.getUuid(), details);
|
||||
throw new CloudRuntimeException(message, lastKnownError);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean canExposeError(Account account) {
|
||||
return (account != null && account.getType() == Account.Type.ADMIN) || Boolean.TRUE.equals(EXPOSE_ERRORS_TO_USER.value());
|
||||
}
|
||||
|
||||
protected void updateStartCommandWithExternalDetails(Host host, VirtualMachineTO vmTO, StartCommand command) {
|
||||
if (!HypervisorType.External.equals(host.getHypervisorType())) {
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -536,6 +536,9 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
|
|||
public static final ConfigKey<Boolean> ALLOW_DOMAIN_ADMINS_TO_CREATE_TAGGED_OFFERINGS = new ConfigKey<>(Boolean.class, "allow.domain.admins.to.create.tagged.offerings", "Advanced",
|
||||
"false", "Allow domain admins to create offerings with tags.", true, ConfigKey.Scope.Account, null);
|
||||
|
||||
public static final ConfigKey<Boolean> EXPOSE_ERRORS_TO_USER = new ConfigKey<>(Boolean.class, "expose.errors.to.user", ConfigKey.CATEGORY_ADVANCED,
|
||||
"false", "If set to true, detailed error messages will be returned to all user roles. If false, detailed errors are only shown to admin users", true, ConfigKey.Scope.Global, null);
|
||||
|
||||
public static final ConfigKey<Long> DELETE_QUERY_BATCH_SIZE = new ConfigKey<>("Advanced", Long.class, "delete.query.batch.size", "0",
|
||||
"Indicates the limit applied while deleting entries in bulk. With this, the delete query will apply the limit as many times as necessary," +
|
||||
" to delete all the entries. This is advised when retaining several days of records, which can lead to slowness. <= 0 means that no limit will " +
|
||||
|
|
@ -8494,11 +8497,10 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
|
|||
BYTES_MAX_READ_LENGTH, BYTES_MAX_WRITE_LENGTH, ADD_HOST_ON_SERVICE_RESTART_KVM, SET_HOST_DOWN_TO_MAINTENANCE,
|
||||
VM_SERVICE_OFFERING_MAX_CPU_CORES, VM_SERVICE_OFFERING_MAX_RAM_SIZE, MIGRATE_VM_ACROSS_CLUSTERS,
|
||||
ENABLE_ACCOUNT_SETTINGS_FOR_DOMAIN, ENABLE_DOMAIN_SETTINGS_FOR_CHILD_DOMAIN,
|
||||
ALLOW_DOMAIN_ADMINS_TO_CREATE_TAGGED_OFFERINGS, DELETE_QUERY_BATCH_SIZE, AllowNonRFC1918CompliantIPs, HostCapacityTypeCpuMemoryWeight
|
||||
ALLOW_DOMAIN_ADMINS_TO_CREATE_TAGGED_OFFERINGS, EXPOSE_ERRORS_TO_USER, DELETE_QUERY_BATCH_SIZE, AllowNonRFC1918CompliantIPs, HostCapacityTypeCpuMemoryWeight
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a string representing the specified configuration's type.
|
||||
* @param configName name of the configuration.
|
||||
|
|
|
|||
Loading…
Reference in New Issue