Use Transaction logic instead of Global Lock during capacity calculation.

This commit is contained in:
alena 2010-09-03 13:55:21 -07:00
parent 8eaeb5111c
commit 9847fb6bf6
3 changed files with 144 additions and 85 deletions

View File

@ -155,38 +155,41 @@ public class UserConcentratedAllocator implements PodAllocator {
private boolean dataCenterAndPodHasEnoughCapacity(long dataCenterId, long podId, long capacityNeeded, short capacityType, long[] hostCandidate) {
List<CapacityVO> capacities = null;
long start = System.currentTimeMillis();
if (m_capacityCheckLock.lock(120)) { // 2 minutes
long lockTime = System.currentTimeMillis();
try {
// long start = System.currentTimeMillis();
// if (m_capacityCheckLock.lock(120)) { // 2 minutes
// long lockTime = System.currentTimeMillis();
// try {
SearchCriteria sc = _capacityDao.createSearchCriteria();
sc.addAnd("capacityType", SearchCriteria.Op.EQ, capacityType);
sc.addAnd("dataCenterId", SearchCriteria.Op.EQ, dataCenterId);
sc.addAnd("podId", SearchCriteria.Op.EQ, podId);
s_logger.trace("Executing search");
capacities = _capacityDao.search(sc, null);
} finally {
m_capacityCheckLock.unlock();
long end = System.currentTimeMillis();
if (s_logger.isTraceEnabled())
s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
}
} else {
s_logger.error("Unable to acquire synchronization lock for pod allocation");
long end = System.currentTimeMillis();
if (s_logger.isTraceEnabled())
s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
// we now try to enforce reservation-style allocation, waiting time has been adjusted
// to 2 minutes
return false;
/*
// If we can't lock the table, just return that there is enough capacity and allow instance creation to fail on the agent
// if there is not enough capacity. All that does is skip the optimization of checking for capacity before sending the
// command to the agent.
return true;
*/
}
s_logger.trace("Done with search");
// } finally {
// m_capacityCheckLock.unlock();
// long end = System.currentTimeMillis();
// if (s_logger.isTraceEnabled())
// s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
// }
// } else {
// s_logger.error("Unable to acquire synchronization lock for pod allocation");
// long end = System.currentTimeMillis();
// if (s_logger.isTraceEnabled())
// s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
//
// // we now try to enforce reservation-style allocation, waiting time has been adjusted
// // to 2 minutes
// return false;
//
///*
// // If we can't lock the table, just return that there is enough capacity and allow instance creation to fail on the agent
// // if there is not enough capacity. All that does is skip the optimization of checking for capacity before sending the
// // command to the agent.
// return true;
//*/
// }
boolean enoughCapacity = false;
if (capacities != null) {

View File

@ -68,6 +68,7 @@ import com.cloud.utils.Pair;
import com.cloud.utils.component.ComponentLocator;
import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.Transaction;
import com.cloud.vm.ConsoleProxyVO;
import com.cloud.vm.DomainRouterVO;
import com.cloud.vm.SecondaryStorageVmVO;
@ -337,6 +338,7 @@ public class AlertManagerImpl implements AlertManager {
s_logger.trace("recalculating system capacity");
}
List<CapacityVO> newCapacities = new ArrayList<CapacityVO>();
// get all hosts..
SearchCriteria sc = _hostDao.createSearchCriteria();
@ -444,34 +446,59 @@ public class AlertManagerImpl implements AlertManager {
// _capacityDao.persist(newPrivateIPCapacity);
}
long start = System.currentTimeMillis();
if (m_capacityCheckLock.lock(5)) { // 5 second timeout
long lockTime = System.currentTimeMillis();
try {
// delete the old records
_capacityDao.clearNonStorageCapacities();
// long start = System.currentTimeMillis();
Transaction txn = Transaction.currentTxn();
try {
txn.start();
// delete the old records
_capacityDao.clearNonStorageCapacities();
for (CapacityVO newCapacity : newCapacities) {
_capacityDao.persist(newCapacity);
}
} finally {
m_capacityCheckLock.unlock();
long end = System.currentTimeMillis();
if (s_logger.isTraceEnabled())
s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
}
if (s_logger.isTraceEnabled()) {
s_logger.trace("done recalculating system capacity");
}
} else {
if (s_logger.isTraceEnabled()) {
s_logger.trace("Skipping capacity check, unable to lock the capacity table for recalculation.");
for (CapacityVO newCapacity : newCapacities) {
s_logger.trace("Executing capacity update");
_capacityDao.persist(newCapacity);
s_logger.trace("Done with capacity update");
}
long end = System.currentTimeMillis();
if (s_logger.isTraceEnabled())
s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
}
txn.commit();
s_logger.trace("");
} catch (Exception ex) {
txn.rollback();
s_logger.error("Unable to start transaction for capacity update");
}finally {
txn.close();
}
// if (m_capacityCheckLock.lock(5)) { // 5 second timeout
// long lockTime = System.currentTimeMillis();
// try {
// // delete the old records
// _capacityDao.clearNonStorageCapacities();
//
// for (CapacityVO newCapacity : newCapacities) {
// _capacityDao.persist(newCapacity);
// }
// txn.commit();
// } finally {
// m_capacityCheckLock.unlock();
// long end = System.currentTimeMillis();
// if (s_logger.isTraceEnabled())
// s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
// }
//
// if (s_logger.isTraceEnabled()) {
// s_logger.trace("done recalculating system capacity");
// }
// } else {
// txn.rollback();
// if (s_logger.isTraceEnabled()) {
// s_logger.trace("Skipping capacity check, unable to lock the capacity table for recalculation.");
// }
// long end = System.currentTimeMillis();
// if (s_logger.isTraceEnabled())
// s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
// }
}
class CapacityChecker extends TimerTask {
@ -644,7 +671,8 @@ public class AlertManagerImpl implements AlertManager {
// TODO: make sure this handles SSL transport (useAuth is true) and regular
public void sendAlert(short alertType, long dataCenterId, Long podId, String subject, String content) throws MessagingException, UnsupportedEncodingException {
AlertVO alert = null;
AlertVO alert = null;
if ((alertType != AlertManager.ALERT_TYPE_HOST) &&
(alertType != AlertManager.ALERT_TYPE_USERVM) &&
(alertType != AlertManager.ALERT_TYPE_DOMAIN_ROUTER) &&
@ -653,7 +681,7 @@ public class AlertManagerImpl implements AlertManager {
(alertType != AlertManager.ALERT_TYPE_MANAGMENT_NODE)) {
alert = _alertDao.getLastAlert(alertType, dataCenterId, podId);
}
if (alert == null) {
// set up a new alert
AlertVO newAlert = new AlertVO();

View File

@ -61,6 +61,7 @@ import com.cloud.utils.component.ComponentLocator;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.Transaction;
import com.cloud.vm.UserVmManager;
import com.cloud.vm.UserVmVO;
import com.cloud.vm.VmStats;
@ -338,41 +339,68 @@ public class StatsCollector {
// _capacityDao.persist(capacity);
}
long start = System.currentTimeMillis();
if (m_capacityCheckLock.lock(5)) { // 5 second timeout
long lockTime = System.currentTimeMillis();
if (s_logger.isTraceEnabled()) {
s_logger.trace("recalculating system storage capacity");
}
try {
// now update the capacity table with the new stats
// FIXME: the right way to do this is to register a listener (see RouterStatsListener)
// for the host stats, send the Watch<something>Command at a regular interval
// to collect the stats from an agent and update the database as needed. The
// listener model has connects/disconnects to keep things in sync much better
// than this model right now
_capacityDao.clearStorageCapacities();
Transaction txn = Transaction.currentTxn();
try {
if (s_logger.isTraceEnabled()) {
s_logger.trace("recalculating system storage capacity");
}
txn.start();
_capacityDao.clearStorageCapacities();
for (CapacityVO newCapacity : newCapacities) {
s_logger.trace("Executing capacity update");
_capacityDao.persist(newCapacity);
s_logger.trace("Done with capacity update");
}
} finally {
m_capacityCheckLock.unlock();
long end = System.currentTimeMillis();
if (s_logger.isTraceEnabled())
s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
}
if (s_logger.isTraceEnabled()) {
s_logger.trace("done recalculating system storage capacity");
}
} else {
if (s_logger.isTraceEnabled()) {
s_logger.trace("not recalculating system storage capacity, unable to lock capacity table");
}
long end = System.currentTimeMillis();
if (s_logger.isTraceEnabled())
s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
txn.commit();
} catch (Exception ex) {
txn.rollback();
s_logger.error("Unable to start transaction for storage capacity update");
}finally {
txn.close();
}
//
//
//
// long start = System.currentTimeMillis();
// if (m_capacityCheckLock.lock(5)) { // 5 second timeout
// long lockTime = System.currentTimeMillis();
// if (s_logger.isTraceEnabled()) {
// s_logger.trace("recalculating system storage capacity");
// }
// try {
// // now update the capacity table with the new stats
// // FIXME: the right way to do this is to register a listener (see RouterStatsListener)
// // for the host stats, send the Watch<something>Command at a regular interval
// // to collect the stats from an agent and update the database as needed. The
// // listener model has connects/disconnects to keep things in sync much better
// // than this model right now
// _capacityDao.clearStorageCapacities();
//
// for (CapacityVO newCapacity : newCapacities) {
// s_logger.trace("Executing capacity update");
// _capacityDao.persist(newCapacity);
// s_logger.trace("Done with capacity update");
// }
// txn.commit();
// } finally {
// m_capacityCheckLock.unlock();
// long end = System.currentTimeMillis();
// if (s_logger.isTraceEnabled())
// s_logger.trace("CapacityCheckLock was held for " + (end - lockTime) + " ms; lock was acquired in " + (lockTime - start) + " ms");
// }
// if (s_logger.isTraceEnabled()) {
// s_logger.trace("done recalculating system storage capacity");
// }
// } else {
// if (s_logger.isTraceEnabled()) {
// s_logger.trace("not recalculating system storage capacity, unable to lock capacity table");
// }
// long end = System.currentTimeMillis();
// if (s_logger.isTraceEnabled())
// s_logger.trace("CapacityCheckerLock got timed out after " + (end - start) + " ms");
// }
} catch (Throwable t) {
s_logger.error("Error trying to retrieve storage stats", t);
}