Compare commits

...

2 Commits

Author SHA1 Message Date
Pearl Dsilva 5a0c1d22d4
Merge bfbb8bb1ef into 6e5d78a8a7 2026-01-22 07:16:28 +00:00
Pearl Dsilva bfbb8bb1ef Allow cleaning up of networks stuck in Implementing state 2026-01-12 11:53:26 -05:00
1 changed files with 75 additions and 0 deletions

View File

@ -969,6 +969,13 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
}
logger.trace("End cleanup expired async-jobs");
// 3) Cleanup orphaned networks stuck in Implementing state without async jobs
try {
cleanupOrphanedNetworks();
} catch (Throwable e) {
logger.error("Unexpected exception when trying to cleanup orphaned networks", e);
}
} catch (Throwable e) {
logger.error("Unexpected exception when trying to execute queue item, ", e);
}
@ -1284,6 +1291,74 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
}
}
/**
* Cleanup networks that are stuck in Implementing state without associated async jobs.
* This only processes networks that have been stuck for longer than the job expiration threshold.
*/
private void cleanupOrphanedNetworks() {
try {
SearchCriteria<NetworkVO> sc = networkDao.createSearchCriteria();
sc.addAnd("state", SearchCriteria.Op.EQ, Network.State.Implementing);
sc.addAnd("removed", SearchCriteria.Op.NULL);
List<NetworkVO> implementingNetworks = networkDao.search(sc, null);
if (implementingNetworks == null || implementingNetworks.isEmpty()) {
return;
}
logger.debug("Found {} networks in Implementing state, checking for orphaned networks", implementingNetworks.size());
final long expireMinutes = JobExpireMinutes.value();
final Date cutoffTime = new Date(System.currentTimeMillis() - (expireMinutes * 60 * 1000));
for (NetworkVO network : implementingNetworks) {
if (network.getCreated().after(cutoffTime)) {
logger.trace("Network {} in Implementing state is only {} minutes old (threshold: {} minutes), skipping cleanup",
network.getId(),
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
expireMinutes);
continue;
}
List<AsyncJobVO> jobs = _jobDao.findInstancePendingAsyncJobs("Network", network.getAccountId());
boolean hasActiveJob = false;
for (AsyncJobVO job : jobs) {
if (job.getInstanceId() != null && job.getInstanceId().equals(network.getId())) {
hasActiveJob = true;
break;
}
}
if (hasActiveJob) {
logger.debug("Network {} in Implementing state has active async job, skipping cleanup", network.getId());
continue;
}
logger.warn("Found orphaned network {} in Implementing state without async job. " +
"Network created: {}, age: {} minutes, expiration threshold: {} minutes. Transitioning to Shutdown state.",
network.getId(), network.getCreated(),
(System.currentTimeMillis() - network.getCreated().getTime()) / 60000,
expireMinutes);
updateNetworkState(network);
}
} catch (Exception e) {
logger.error("Error while cleaning up orphaned networks", e);
}
}
private void updateNetworkState(NetworkVO network) {
try {
networkOrchestrationService.stateTransitTo(network, Network.Event.OperationFailed);
logger.info("Successfully transitioned orphaned network {} to Shutdown state using state machine", network.getId());
} catch (final NoTransitionException e) {
logger.debug("State transition failed for orphaned network {}, forcing state update", network.getId());
network.setState(Network.State.Shutdown);
networkDao.update(network.getId(), network);
logger.info("Successfully forced orphaned network {} to Shutdown state", network.getId());
}
}
@Override
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
}