From 9847fb6bf6fb946771c6d1cd5b2803dd7dee8b0d Mon Sep 17 00:00:00 2001 From: alena Date: Fri, 3 Sep 2010 13:55:21 -0700 Subject: [PATCH] Use Transaction logic instead of Global Lock during capacity calculation. --- .../impl/UserConcentratedAllocator.java | 57 ++++++------ .../src/com/cloud/alert/AlertManagerImpl.java | 84 ++++++++++++------ .../src/com/cloud/server/StatsCollector.java | 88 ++++++++++++------- 3 files changed, 144 insertions(+), 85 deletions(-) diff --git a/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java b/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java index 2ceac8a3392..ffbb9e9ae8d 100755 --- a/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java +++ b/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java @@ -155,38 +155,41 @@ public class UserConcentratedAllocator implements PodAllocator { private boolean dataCenterAndPodHasEnoughCapacity(long dataCenterId, long podId, long capacityNeeded, short capacityType, long[] hostCandidate) { List capacities = null; - long start = System.currentTimeMillis(); - if (m_capacityCheckLock.lock(120)) { // 2 minutes - long lockTime = System.currentTimeMillis(); - try { +// long start = System.currentTimeMillis(); +// if (m_capacityCheckLock.lock(120)) { // 2 minutes +// long lockTime = System.currentTimeMillis(); +// try { SearchCriteria sc = _capacityDao.createSearchCriteria(); sc.addAnd("capacityType", SearchCriteria.Op.EQ, capacityType); sc.addAnd("dataCenterId", SearchCriteria.Op.EQ, dataCenterId); sc.addAnd("podId", SearchCriteria.Op.EQ, podId); + s_logger.trace("Executing search"); capacities = _capacityDao.search(sc, null); - } finally { - m_capacityCheckLock.unlock(); - long end = System.currentTimeMillis(); - if (s_logger.isTraceEnabled()) - s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms"); - } - } else { - s_logger.error("Unable to acquire synchronization lock for pod allocation"); - long end = System.currentTimeMillis(); - if (s_logger.isTraceEnabled()) - s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms"); - - // we now try to enforce reservation-style allocation, waiting time has been adjusted - // to 2 minutes - return false; - -/* - // If we can't lock the table, just return that there is enough capacity and allow instance creation to fail on the agent - // if there is not enough capacity. All that does is skip the optimization of checking for capacity before sending the - // command to the agent. - return true; -*/ - } + s_logger.trace("Done with search"); + +// } finally { +// m_capacityCheckLock.unlock(); +// long end = System.currentTimeMillis(); +// if (s_logger.isTraceEnabled()) +// s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms"); +// } +// } else { +// s_logger.error("Unable to acquire synchronization lock for pod allocation"); +// long end = System.currentTimeMillis(); +// if (s_logger.isTraceEnabled()) +// s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms"); +// +// // we now try to enforce reservation-style allocation, waiting time has been adjusted +// // to 2 minutes +// return false; +// +///* +// // If we can't lock the table, just return that there is enough capacity and allow instance creation to fail on the agent +// // if there is not enough capacity. All that does is skip the optimization of checking for capacity before sending the +// // command to the agent. +// return true; +//*/ +// } boolean enoughCapacity = false; if (capacities != null) { diff --git a/server/src/com/cloud/alert/AlertManagerImpl.java b/server/src/com/cloud/alert/AlertManagerImpl.java index 80391e313dd..112c96dd2d3 100644 --- a/server/src/com/cloud/alert/AlertManagerImpl.java +++ b/server/src/com/cloud/alert/AlertManagerImpl.java @@ -68,6 +68,7 @@ import com.cloud.utils.Pair; import com.cloud.utils.component.ComponentLocator; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.db.SearchCriteria; +import com.cloud.utils.db.Transaction; import com.cloud.vm.ConsoleProxyVO; import com.cloud.vm.DomainRouterVO; import com.cloud.vm.SecondaryStorageVmVO; @@ -337,6 +338,7 @@ public class AlertManagerImpl implements AlertManager { s_logger.trace("recalculating system capacity"); } List newCapacities = new ArrayList(); + // get all hosts.. SearchCriteria sc = _hostDao.createSearchCriteria(); @@ -444,34 +446,59 @@ public class AlertManagerImpl implements AlertManager { // _capacityDao.persist(newPrivateIPCapacity); } - long start = System.currentTimeMillis(); - if (m_capacityCheckLock.lock(5)) { // 5 second timeout - long lockTime = System.currentTimeMillis(); - try { - // delete the old records - _capacityDao.clearNonStorageCapacities(); +// long start = System.currentTimeMillis(); + + Transaction txn = Transaction.currentTxn(); + try { + txn.start(); + // delete the old records + _capacityDao.clearNonStorageCapacities(); - for (CapacityVO newCapacity : newCapacities) { - _capacityDao.persist(newCapacity); - } - } finally { - m_capacityCheckLock.unlock(); - long end = System.currentTimeMillis(); - if (s_logger.isTraceEnabled()) - s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms"); - } - - if (s_logger.isTraceEnabled()) { - s_logger.trace("done recalculating system capacity"); - } - } else { - if (s_logger.isTraceEnabled()) { - s_logger.trace("Skipping capacity check, unable to lock the capacity table for recalculation."); + for (CapacityVO newCapacity : newCapacities) { + s_logger.trace("Executing capacity update"); + _capacityDao.persist(newCapacity); + s_logger.trace("Done with capacity update"); } - long end = System.currentTimeMillis(); - if (s_logger.isTraceEnabled()) - s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms"); - } + txn.commit(); + s_logger.trace(""); + } catch (Exception ex) { + txn.rollback(); + s_logger.error("Unable to start transaction for capacity update"); + }finally { + txn.close(); + } + + + +// if (m_capacityCheckLock.lock(5)) { // 5 second timeout +// long lockTime = System.currentTimeMillis(); +// try { +// // delete the old records +// _capacityDao.clearNonStorageCapacities(); +// +// for (CapacityVO newCapacity : newCapacities) { +// _capacityDao.persist(newCapacity); +// } +// txn.commit(); +// } finally { +// m_capacityCheckLock.unlock(); +// long end = System.currentTimeMillis(); +// if (s_logger.isTraceEnabled()) +// s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms"); +// } +// +// if (s_logger.isTraceEnabled()) { +// s_logger.trace("done recalculating system capacity"); +// } +// } else { +// txn.rollback(); +// if (s_logger.isTraceEnabled()) { +// s_logger.trace("Skipping capacity check, unable to lock the capacity table for recalculation."); +// } +// long end = System.currentTimeMillis(); +// if (s_logger.isTraceEnabled()) +// s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms"); +// } } class CapacityChecker extends TimerTask { @@ -644,7 +671,8 @@ public class AlertManagerImpl implements AlertManager { // TODO: make sure this handles SSL transport (useAuth is true) and regular public void sendAlert(short alertType, long dataCenterId, Long podId, String subject, String content) throws MessagingException, UnsupportedEncodingException { - AlertVO alert = null; + AlertVO alert = null; + if ((alertType != AlertManager.ALERT_TYPE_HOST) && (alertType != AlertManager.ALERT_TYPE_USERVM) && (alertType != AlertManager.ALERT_TYPE_DOMAIN_ROUTER) && @@ -653,7 +681,7 @@ public class AlertManagerImpl implements AlertManager { (alertType != AlertManager.ALERT_TYPE_MANAGMENT_NODE)) { alert = _alertDao.getLastAlert(alertType, dataCenterId, podId); } - + if (alert == null) { // set up a new alert AlertVO newAlert = new AlertVO(); diff --git a/server/src/com/cloud/server/StatsCollector.java b/server/src/com/cloud/server/StatsCollector.java index 5242bb82a45..2aa578396f8 100644 --- a/server/src/com/cloud/server/StatsCollector.java +++ b/server/src/com/cloud/server/StatsCollector.java @@ -61,6 +61,7 @@ import com.cloud.utils.component.ComponentLocator; import com.cloud.utils.concurrency.NamedThreadFactory; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.db.SearchCriteria; +import com.cloud.utils.db.Transaction; import com.cloud.vm.UserVmManager; import com.cloud.vm.UserVmVO; import com.cloud.vm.VmStats; @@ -338,41 +339,68 @@ public class StatsCollector { // _capacityDao.persist(capacity); } - long start = System.currentTimeMillis(); - if (m_capacityCheckLock.lock(5)) { // 5 second timeout - long lockTime = System.currentTimeMillis(); - if (s_logger.isTraceEnabled()) { - s_logger.trace("recalculating system storage capacity"); - } - try { - // now update the capacity table with the new stats - // FIXME: the right way to do this is to register a listener (see RouterStatsListener) - // for the host stats, send the WatchCommand at a regular interval - // to collect the stats from an agent and update the database as needed. The - // listener model has connects/disconnects to keep things in sync much better - // than this model right now - _capacityDao.clearStorageCapacities(); + Transaction txn = Transaction.currentTxn(); + try { + if (s_logger.isTraceEnabled()) { + s_logger.trace("recalculating system storage capacity"); + } + txn.start(); + _capacityDao.clearStorageCapacities(); for (CapacityVO newCapacity : newCapacities) { + s_logger.trace("Executing capacity update"); _capacityDao.persist(newCapacity); + s_logger.trace("Done with capacity update"); } - } finally { - m_capacityCheckLock.unlock(); - long end = System.currentTimeMillis(); - if (s_logger.isTraceEnabled()) - s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms"); - } - if (s_logger.isTraceEnabled()) { - s_logger.trace("done recalculating system storage capacity"); - } - } else { - if (s_logger.isTraceEnabled()) { - s_logger.trace("not recalculating system storage capacity, unable to lock capacity table"); - } - long end = System.currentTimeMillis(); - if (s_logger.isTraceEnabled()) - s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms"); + txn.commit(); + } catch (Exception ex) { + txn.rollback(); + s_logger.error("Unable to start transaction for storage capacity update"); + }finally { + txn.close(); } + +// +// +// +// long start = System.currentTimeMillis(); +// if (m_capacityCheckLock.lock(5)) { // 5 second timeout +// long lockTime = System.currentTimeMillis(); +// if (s_logger.isTraceEnabled()) { +// s_logger.trace("recalculating system storage capacity"); +// } +// try { +// // now update the capacity table with the new stats +// // FIXME: the right way to do this is to register a listener (see RouterStatsListener) +// // for the host stats, send the WatchCommand at a regular interval +// // to collect the stats from an agent and update the database as needed. The +// // listener model has connects/disconnects to keep things in sync much better +// // than this model right now +// _capacityDao.clearStorageCapacities(); +// +// for (CapacityVO newCapacity : newCapacities) { +// s_logger.trace("Executing capacity update"); +// _capacityDao.persist(newCapacity); +// s_logger.trace("Done with capacity update"); +// } +// txn.commit(); +// } finally { +// m_capacityCheckLock.unlock(); +// long end = System.currentTimeMillis(); +// if (s_logger.isTraceEnabled()) +// s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms"); +// } +// if (s_logger.isTraceEnabled()) { +// s_logger.trace("done recalculating system storage capacity"); +// } +// } else { +// if (s_logger.isTraceEnabled()) { +// s_logger.trace("not recalculating system storage capacity, unable to lock capacity table"); +// } +// long end = System.currentTimeMillis(); +// if (s_logger.isTraceEnabled()) +// s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms"); +// } } catch (Throwable t) { s_logger.error("Error trying to retrieve storage stats", t); }