mirror of https://github.com/apache/cloudstack.git
Merge 21c6110b71 into 5893ba5a8c
This commit is contained in:
commit
744f95c43a
|
|
@ -514,6 +514,17 @@ public class LinstorStorageAdaptor implements StorageAdaptor {
|
|||
ApiCallRcList answers = api.resourceDefinitionDelete(rd.getName());
|
||||
checkLinstorAnswersThrow(answers);
|
||||
deleted = true;
|
||||
|
||||
// LINSTOR can return success here while the resource lingers in DELETING state
|
||||
// on the controller (down peer, lost quorum, etc.). Confirm it's actually gone
|
||||
// — if not, log a WARN so operators can clear it manually. Don't throw: the
|
||||
// CloudStack-side accounting has already moved on.
|
||||
if (!LinstorUtil.waitForResourceDefinitionDeleted(api, rd.getName(),
|
||||
LinstorUtil.DEFAULT_RD_DELETE_VERIFY_TIMEOUT_MILLIS)) {
|
||||
logger.warn("Linstor: resource {} still present {}ms after delete returned success — " +
|
||||
"may be stuck in DELETING. Check the LINSTOR controller (linstor resource list).",
|
||||
rd.getName(), LinstorUtil.DEFAULT_RD_DELETE_VERIFY_TIMEOUT_MILLIS);
|
||||
}
|
||||
}
|
||||
}
|
||||
return deleted;
|
||||
|
|
|
|||
|
|
@ -232,6 +232,20 @@ public class LinstorPrimaryDataStoreDriverImpl implements PrimaryDataStoreDriver
|
|||
throw new CloudRuntimeException("Linstor: Unable to delete resource definition: " + rscDefName);
|
||||
}
|
||||
logger.info("Linstor: Deleted resource {}", rscDefName);
|
||||
|
||||
// LINSTOR can return success on the delete API call while the resource lingers in
|
||||
// DELETING state (peer issues, lost quorum, satellite down). Verify the resource is
|
||||
// actually gone — if not, log a WARN so operators see it. We deliberately do NOT
|
||||
// throw here: the volume is already considered gone on the CloudStack side, and
|
||||
// throwing would leave the CS DB and LINSTOR in different states.
|
||||
if (!LinstorUtil.waitForResourceDefinitionDeleted(linstorApi, rscDefName,
|
||||
LinstorUtil.DEFAULT_RD_DELETE_VERIFY_TIMEOUT_MILLIS))
|
||||
{
|
||||
logger.warn("Linstor: resource {} still present {}ms after delete returned success — " +
|
||||
"may be stuck in DELETING. Check the LINSTOR controller (linstor resource list) " +
|
||||
"and clear manually if the resource has no live peers.",
|
||||
rscDefName, LinstorUtil.DEFAULT_RD_DELETE_VERIFY_TIMEOUT_MILLIS);
|
||||
}
|
||||
} catch (ApiException apiEx)
|
||||
{
|
||||
logger.error("Linstor: ApiEx - " + apiEx.getMessage());
|
||||
|
|
|
|||
|
|
@ -401,6 +401,57 @@ public class LinstorUtil {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Default per-call timeout for {@link #waitForResourceDefinitionDeleted}. Long enough for a
|
||||
* healthy LINSTOR controller to finish a normal delete; short enough not to block the calling
|
||||
* agent thread for too long if the delete is genuinely stuck.
|
||||
*/
|
||||
public static final long DEFAULT_RD_DELETE_VERIFY_TIMEOUT_MILLIS = 30_000L;
|
||||
|
||||
/**
|
||||
* Returns {@code true} if the named resource definition is no longer present on the LINSTOR
|
||||
* controller. Used after a {@code resourceDefinitionDelete} to verify the delete actually
|
||||
* completed (LINSTOR can return success on the API call while the resource lingers in
|
||||
* DELETING state due to peer issues, lost quorum, or down satellites).
|
||||
*/
|
||||
public static boolean isResourceDefinitionGone(DevelopersApi api, String rscName) throws ApiException {
|
||||
List<ResourceDefinition> all = api.resourceDefinitionList(null, false, null, null, null);
|
||||
if (all == null) {
|
||||
return true;
|
||||
}
|
||||
return all.stream().noneMatch(rd -> rscName.equalsIgnoreCase(rd.getName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Polls the controller until the named resource definition is gone or the timeout elapses.
|
||||
* Returns {@code true} if the resource was confirmed gone, {@code false} if it was still
|
||||
* present (or the controller kept erroring) at the deadline. Callers should NOT throw on a
|
||||
* {@code false} return — the upstream API call already reported success and the operator
|
||||
* may need to investigate manually. Log a WARN with the resource name instead.
|
||||
*/
|
||||
public static boolean waitForResourceDefinitionDeleted(DevelopersApi api, String rscName, long timeoutMillis) {
|
||||
final long deadline = System.currentTimeMillis() + timeoutMillis;
|
||||
while (true) {
|
||||
try {
|
||||
if (isResourceDefinitionGone(api, rscName)) {
|
||||
return true;
|
||||
}
|
||||
} catch (ApiException e) {
|
||||
LOGGER.debug("LINSTOR delete-verify poll failed for {}: {}", rscName, e.getMessage());
|
||||
// Keep polling — controller may be transiently unavailable.
|
||||
}
|
||||
if (System.currentTimeMillis() >= deadline) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(1_000L);
|
||||
} catch (InterruptedException ie) {
|
||||
Thread.currentThread().interrupt();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pair list of resource-definitions with ther 1:1 mapped resource-group objects that start with the
|
||||
* resource name `startWith`
|
||||
|
|
|
|||
Loading…
Reference in New Issue