From 3123c30f23ee0f089a554ec5ea50fac1c4731329 Mon Sep 17 00:00:00 2001 From: Kelven Yang Date: Fri, 7 Mar 2014 17:15:04 -0800 Subject: [PATCH] BUG-ID: CS-19196: Relax HA work termination rule on exceptions. It could cause premature HA termination due to unhandled exceptions. Reviewed-By:Anthony --- .../src/com/cloud/ha/HighAvailabilityManagerImpl.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java index 52100c849ac..48d998a53d8 100755 --- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -913,8 +913,14 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai work.setDateTaken(null); } } catch (Exception e) { - s_logger.error("Terminating " + work, e); - work.setStep(Step.Error); + s_logger.warn("Encountered unhandled exception during HA process, reschedule retry", e); + + long nextTime = (System.currentTimeMillis() >> 10) + _restartRetryInterval; + + s_logger.info("Rescheduling " + work + " to try again at " + new Date(nextTime << 10)); + work.setTimeToTry(nextTime); + work.setServerId(null); + work.setDateTaken(null); } _haDao.update(work.getId(), work); } catch (final Throwable th) {