Use Transaction logic instead of Global Lock during capacity calculation.

2010-09-03 13:55:21 -07:00 · 2010-09-03 13:55:21 -07:00 · 9847fb6bf6
parent 8eaeb5111c
commit 9847fb6bf6
3 changed files with 144 additions and 85 deletions
--- a/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java
+++ b/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java
@ -155,38 +155,41 @@ public class UserConcentratedAllocator implements PodAllocator {

    private boolean dataCenterAndPodHasEnoughCapacity(long dataCenterId, long podId, long capacityNeeded, short capacityType, long[] hostCandidate) {
        List<CapacityVO> capacities = null;
-        long start = System.currentTimeMillis();
-        if (m_capacityCheckLock.lock(120)) { // 2 minutes
-        	long lockTime = System.currentTimeMillis();
-            try {
+//        long start = System.currentTimeMillis();
+//        if (m_capacityCheckLock.lock(120)) { // 2 minutes
+//        	long lockTime = System.currentTimeMillis();
+//            try {
                SearchCriteria sc = _capacityDao.createSearchCriteria();
                sc.addAnd("capacityType", SearchCriteria.Op.EQ, capacityType);
                sc.addAnd("dataCenterId", SearchCriteria.Op.EQ, dataCenterId);
                sc.addAnd("podId", SearchCriteria.Op.EQ, podId);
+                s_logger.trace("Executing search");
                capacities = _capacityDao.search(sc, null);
-            } finally {
-                m_capacityCheckLock.unlock();
-                long end = System.currentTimeMillis();
-                if (s_logger.isTraceEnabled())
-                	s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
-            }
-        } else {
-            s_logger.error("Unable to acquire synchronization lock for pod allocation");
-            long end = System.currentTimeMillis();
-            if (s_logger.isTraceEnabled())
-            	s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
-            
-            // we now try to enforce reservation-style allocation, waiting time has been adjusted
-            // to 2 minutes
-            return false;
-
-/*
-            // If we can't lock the table, just return that there is enough capacity and allow instance creation to fail on the agent
-            // if there is not enough capacity.  All that does is skip the optimization of checking for capacity before sending the
-            // command to the agent.
-            return true;
-*/
-        }
+                s_logger.trace("Done with search");
+                
+//            } finally {
+//                m_capacityCheckLock.unlock();
+//                long end = System.currentTimeMillis();
+//                if (s_logger.isTraceEnabled())
+//                	s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
+//            }
+//        } else {
+//            s_logger.error("Unable to acquire synchronization lock for pod allocation");
+//            long end = System.currentTimeMillis();
+//            if (s_logger.isTraceEnabled())
+//            	s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
+//            
+//            // we now try to enforce reservation-style allocation, waiting time has been adjusted
+//            // to 2 minutes
+//            return false;
+//
+///*
+//            // If we can't lock the table, just return that there is enough capacity and allow instance creation to fail on the agent
+//            // if there is not enough capacity.  All that does is skip the optimization of checking for capacity before sending the
+//            // command to the agent.
+//            return true;
+//*/
+//        }

        boolean enoughCapacity = false;
        if (capacities != null) {
--- a/server/src/com/cloud/alert/AlertManagerImpl.java
+++ b/server/src/com/cloud/alert/AlertManagerImpl.java
@ -68,6 +68,7 @@ import com.cloud.utils.Pair;
 import com.cloud.utils.component.ComponentLocator;
 import com.cloud.utils.db.GlobalLock;
 import com.cloud.utils.db.SearchCriteria;
+import com.cloud.utils.db.Transaction;
 import com.cloud.vm.ConsoleProxyVO;
 import com.cloud.vm.DomainRouterVO;
 import com.cloud.vm.SecondaryStorageVmVO;
@ -337,6 +338,7 @@ public class AlertManagerImpl implements AlertManager {
            s_logger.trace("recalculating system capacity");
        }
        List<CapacityVO> newCapacities = new ArrayList<CapacityVO>();
+        

        // get all hosts..
        SearchCriteria sc = _hostDao.createSearchCriteria();
@ -444,34 +446,59 @@ public class AlertManagerImpl implements AlertManager {
 //            _capacityDao.persist(newPrivateIPCapacity);
        }
        
-        long start = System.currentTimeMillis();
-        if (m_capacityCheckLock.lock(5)) { // 5 second timeout
-        	long lockTime = System.currentTimeMillis();
-            try {
-                // delete the old records
-                _capacityDao.clearNonStorageCapacities();
+//        long start = System.currentTimeMillis();
+        
+        Transaction txn = Transaction.currentTxn();
+        try {
+        	txn.start();
+        	// delete the old records
+            _capacityDao.clearNonStorageCapacities();

-                for (CapacityVO newCapacity : newCapacities) {
-                    _capacityDao.persist(newCapacity);
-                }
-            } finally {
-                m_capacityCheckLock.unlock();
-                long end = System.currentTimeMillis();
-                if (s_logger.isTraceEnabled())
-                	s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
-            }
-
-            if (s_logger.isTraceEnabled()) {
-                s_logger.trace("done recalculating system capacity");
-            }
-        } else {
-            if (s_logger.isTraceEnabled()) {
-                s_logger.trace("Skipping capacity check, unable to lock the capacity table for recalculation.");
+            for (CapacityVO newCapacity : newCapacities) {
+            	s_logger.trace("Executing capacity update");
+                _capacityDao.persist(newCapacity);
+                s_logger.trace("Done with capacity update");
            }
-            long end = System.currentTimeMillis();
-            if (s_logger.isTraceEnabled())
-            	s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
-        }
+            txn.commit();
+            s_logger.trace("");
+        } catch (Exception ex) {
+        	txn.rollback();
+        	s_logger.error("Unable to start transaction for capacity update");
+        }finally {
+        	txn.close();
+        }
+        
+        
+        
+//        if (m_capacityCheckLock.lock(5)) { // 5 second timeout
+//        	long lockTime = System.currentTimeMillis();
+//            try {
+//                // delete the old records
+//                _capacityDao.clearNonStorageCapacities();
+//
+//                for (CapacityVO newCapacity : newCapacities) {
+//                    _capacityDao.persist(newCapacity);
+//                }
+//                txn.commit();
+//            } finally {
+//                m_capacityCheckLock.unlock();
+//                long end = System.currentTimeMillis();
+//                if (s_logger.isTraceEnabled())
+//                	s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
+//            }
+//
+//            if (s_logger.isTraceEnabled()) {
+//                s_logger.trace("done recalculating system capacity");
+//            }
+//        } else {
+//        	txn.rollback();
+//            if (s_logger.isTraceEnabled()) {
+//                s_logger.trace("Skipping capacity check, unable to lock the capacity table for recalculation.");
+//            }
+//            long end = System.currentTimeMillis();
+//            if (s_logger.isTraceEnabled())
+//            	s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
+//        }
    }

    class CapacityChecker extends TimerTask {
@ -644,7 +671,8 @@ public class AlertManagerImpl implements AlertManager {

        // TODO:  make sure this handles SSL transport (useAuth is true) and regular
        public void sendAlert(short alertType, long dataCenterId, Long podId, String subject, String content) throws MessagingException, UnsupportedEncodingException {
-            AlertVO alert = null;
+            AlertVO alert = null;
+            
            if ((alertType != AlertManager.ALERT_TYPE_HOST) &&
                (alertType != AlertManager.ALERT_TYPE_USERVM) &&
                (alertType != AlertManager.ALERT_TYPE_DOMAIN_ROUTER) &&
@ -653,7 +681,7 @@ public class AlertManagerImpl implements AlertManager {
                (alertType != AlertManager.ALERT_TYPE_MANAGMENT_NODE)) {
                alert = _alertDao.getLastAlert(alertType, dataCenterId, podId);
            }
-
+            
            if (alert == null) {
                // set up a new alert
                AlertVO newAlert = new AlertVO();
--- a/server/src/com/cloud/server/StatsCollector.java
+++ b/server/src/com/cloud/server/StatsCollector.java
@ -61,6 +61,7 @@ import com.cloud.utils.component.ComponentLocator;
 import com.cloud.utils.concurrency.NamedThreadFactory;
 import com.cloud.utils.db.GlobalLock;
 import com.cloud.utils.db.SearchCriteria;
+import com.cloud.utils.db.Transaction;
 import com.cloud.vm.UserVmManager;
 import com.cloud.vm.UserVmVO;
 import com.cloud.vm.VmStats;
@ -338,41 +339,68 @@ public class StatsCollector {
 //                    _capacityDao.persist(capacity);
                }
                
-                long start = System.currentTimeMillis();
-                if (m_capacityCheckLock.lock(5)) { // 5 second timeout
-                	long lockTime = System.currentTimeMillis();
-		            if (s_logger.isTraceEnabled()) {
-		                s_logger.trace("recalculating system storage capacity");
-		            }
-		            try {
-		                // now update the capacity table with the new stats
-		                // FIXME: the right way to do this is to register a listener (see RouterStatsListener)
-		                //        for the host stats, send the Watch<something>Command at a regular interval
-		                //        to collect the stats from an agent and update the database as needed.  The
-		                //        listener model has connects/disconnects to keep things in sync much better
-		                //        than this model right now
-		                _capacityDao.clearStorageCapacities();
+                Transaction txn = Transaction.currentTxn();
+                try {
+                	if (s_logger.isTraceEnabled()) {
+		                s_logger.trace("recalculating system storage capacity");
+		            }
+                	txn.start();
+                	 _capacityDao.clearStorageCapacities();

 		                for (CapacityVO newCapacity : newCapacities) {
+		                	s_logger.trace("Executing capacity update");
 		                    _capacityDao.persist(newCapacity);
+		                    s_logger.trace("Done with capacity update");
 		                }
-		            } finally {
-                        m_capacityCheckLock.unlock();
-                        long end = System.currentTimeMillis();
-                        if (s_logger.isTraceEnabled())
-                        	s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
-		            }
-                    if (s_logger.isTraceEnabled()) {
-                        s_logger.trace("done recalculating system storage capacity");
-                    }
-                } else {
-                    if (s_logger.isTraceEnabled()) {
-                        s_logger.trace("not recalculating system storage capacity, unable to lock capacity table");
-                    }
-                        long end = System.currentTimeMillis();
-                        if (s_logger.isTraceEnabled())
-                        	s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
+		                txn.commit();
+                } catch (Exception ex) {
+                	txn.rollback();
+                	s_logger.error("Unable to start transaction for storage capacity update");
+                }finally {
+                	txn.close();
                }
+                
+//                
+//                
+//                
+//                long start = System.currentTimeMillis();
+//                if (m_capacityCheckLock.lock(5)) { // 5 second timeout
+//                	long lockTime = System.currentTimeMillis();
+//		            if (s_logger.isTraceEnabled()) {
+//		                s_logger.trace("recalculating system storage capacity");
+//		            }
+//		            try {
+//		                // now update the capacity table with the new stats
+//		                // FIXME: the right way to do this is to register a listener (see RouterStatsListener)
+//		                //        for the host stats, send the Watch<something>Command at a regular interval
+//		                //        to collect the stats from an agent and update the database as needed.  The
+//		                //        listener model has connects/disconnects to keep things in sync much better
+//		                //        than this model right now
+//		                _capacityDao.clearStorageCapacities();
+//
+//		                for (CapacityVO newCapacity : newCapacities) {
+//		                	s_logger.trace("Executing capacity update");
+//		                    _capacityDao.persist(newCapacity);
+//		                    s_logger.trace("Done with capacity update");
+//		                }
+//		                txn.commit();
+//		            } finally {
+//                        m_capacityCheckLock.unlock();
+//                        long end = System.currentTimeMillis();
+//                        if (s_logger.isTraceEnabled())
+//                        	s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
+//		            }
+//                    if (s_logger.isTraceEnabled()) {
+//                        s_logger.trace("done recalculating system storage capacity");
+//                    }
+//                } else {
+//                    if (s_logger.isTraceEnabled()) {
+//                        s_logger.trace("not recalculating system storage capacity, unable to lock capacity table");
+//                    }
+//                        long end = System.currentTimeMillis();
+//                        if (s_logger.isTraceEnabled())
+//                        	s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
+//                }
 			} catch (Throwable t) {
 				s_logger.error("Error trying to retrieve storage stats", t);
 			}