bug 10920: avoid deadlocks by not using order by random

This commit is contained in:
Chiradeep Vittal 2011-07-30 12:54:34 -07:00
parent 03fbe13f58
commit f49469270e
2 changed files with 34 additions and 21 deletions

View File

@ -190,7 +190,7 @@ public class SecurityGroupManagerImpl implements SecurityGroupManager, SecurityG
try {
cleanupFinishedWork();
cleanupUnfinishedWork();
processScheduledWork();
//processScheduledWork();
} finally {
txn.close("SG Cleanup");
}
@ -829,9 +829,20 @@ public class SecurityGroupManagerImpl implements SecurityGroupManager, SecurityG
}
final SecurityGroupWorkVO work = _workDao.take(_serverId);
if (work == null) {
if (s_logger.isTraceEnabled()) {
s_logger.trace("Security Group work: no work found");
}
return;
}
Long userVmId = work.getInstanceId();
if (work.getStep() == Step.Done) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Security Group work: found a job in done state, rescheduling for vm: " + userVmId);
}
Set<Long> affectedVms = new HashSet<Long>();
affectedVms.add(userVmId);
scheduleRulesetUpdateToHosts(affectedVms, true, _timeBetweenCleanups*1000l);
}
UserVm vm = null;
Long seqnum = null;
s_logger.debug("Working on " + work);

View File

@ -26,7 +26,6 @@ import javax.ejb.Local;
import org.apache.log4j.Logger;
import com.cloud.ha.HaWorkVO;
import com.cloud.ha.dao.HighAvailabilityDaoImpl;
import com.cloud.network.security.SecurityGroupWorkVO;
import com.cloud.network.security.SecurityGroupWorkVO.Step;
import com.cloud.utils.db.DB;
@ -106,32 +105,35 @@ public class SecurityGroupWorkDaoImpl extends GenericDaoBase<SecurityGroupWorkVO
final SearchCriteria<SecurityGroupWorkVO> sc = UntakenWorkSearch.create();
sc.setParameters("step", Step.Scheduled);
final Filter filter = new Filter(SecurityGroupWorkVO.class, null, true, 0l, 1l);//FIXME: order desc by update time?
txn.start();
final SecurityGroupWorkVO vo = this.lockOneRandomRow(sc, true);
if (vo == null) {
if (s_logger.isTraceEnabled()) {
s_logger.trace("No security group work items found");
}
final List<SecurityGroupWorkVO> vos = lockRows(sc, filter, true);
if (vos.size() == 0) {
txn.commit();
if (s_logger.isTraceEnabled()) {
s_logger.trace("Security Group take: no work found");
}
return null;
}
SecurityGroupWorkVO work = null;
//ensure that there is no job in Processing state for the same VM
if ( findByVmIdStep(vo.getInstanceId(), Step.Processing) == null) {
work = vo;
}
if (work == null) {
SecurityGroupWorkVO work = vos.get(0);
boolean processing = false;
if ( findByVmIdStep(work.getInstanceId(), Step.Processing) != null) {
//ensure that there is no job in Processing state for the same VM
processing = true;
if (s_logger.isTraceEnabled()) {
s_logger.trace("Found a security group work item in Scheduled and Processing, exiting vm="+vo.getInstanceId());
s_logger.trace("Security Group work take: found a job in Scheduled and Processing vmid=" + work.getInstanceId());
}
txn.commit();
return null;
}
work.setServerId(serverId);
work.setDateTaken(new Date());
work.setStep(SecurityGroupWorkVO.Step.Processing);
if (processing) {
//the caller to take() should check the step and schedule another work item to come back
//and take a look.
work.setStep(SecurityGroupWorkVO.Step.Done);
} else {
work.setStep(SecurityGroupWorkVO.Step.Processing);
}
update(work.getId(), work);
@ -214,8 +216,8 @@ public class SecurityGroupWorkDaoImpl extends GenericDaoBase<SecurityGroupWorkVO
return result;
}
@Override
@Override
public List<SecurityGroupWorkVO> findScheduledWork() {
final SearchCriteria<SecurityGroupWorkVO> sc = UntakenWorkSearch.create();
sc.setParameters("step", Step.Scheduled);