bug 12709: incremental fix - profiling management server clustering heartbeat activities

This commit is contained in:
Kelven Yang 2012-01-03 17:51:25 -08:00
parent 277fdc229b
commit 3750c7055b
2 changed files with 87 additions and 43 deletions

View File

@ -593,43 +593,66 @@ public class ClusterManagerImpl implements ClusterManager {
public void run() {
Transaction txn = Transaction.open("ClusterHeartBeat");
try {
txn.transitToUserManagedConnection(getHeartbeatConnection());
if(s_logger.isTraceEnabled()) {
s_logger.trace("Cluster manager heartbeat update, id:" + _mshostId);
}
_mshostDao.update(_mshostId, getCurrentRunId(), DateUtil.currentGMTTime());
if (s_logger.isTraceEnabled()) {
s_logger.trace("Cluster manager peer-scan, id:" + _mshostId);
}
if (!_peerScanInited) {
_peerScanInited = true;
initPeerScan();
}
peerScan();
Profiler profiler = new Profiler();
Profiler profilerHeartbeatUpdate = new Profiler();
Profiler profilerPeerScan = new Profiler();
Profiler profilerAgentLB = new Profiler();
//initiate agent lb task will be scheduled and executed only once, and only when number of agents loaded exceeds _connectedAgentsThreshold
if (_agentLBEnabled && !_agentLbHappened) {
List<HostVO> allManagedRoutingAgents = _hostDao.listManagedRoutingAgents();
List<HostVO> allAgents = _hostDao.listAllRoutingAgents();
double allHostsCount = allAgents.size();
double managedHostsCount = allManagedRoutingAgents.size();
if (allHostsCount > 0.0) {
double load = managedHostsCount/allHostsCount;
if (load >= _connectedAgentsThreshold) {
s_logger.debug("Scheduling agent rebalancing task as the average agent load " + load + " is more than the threshold " + _connectedAgentsThreshold);
_rebalanceService.scheduleRebalanceAgents();
_agentLbHappened = true;
} else {
s_logger.trace("Not scheduling agent rebalancing task as the averages load " + load + " is less than the threshold " + _connectedAgentsThreshold);
}
}
try {
profiler.start();
profilerHeartbeatUpdate.start();
txn.transitToUserManagedConnection(getHeartbeatConnection());
if(s_logger.isTraceEnabled()) {
s_logger.trace("Cluster manager heartbeat update, id:" + _mshostId);
}
_mshostDao.update(_mshostId, getCurrentRunId(), DateUtil.currentGMTTime());
profilerHeartbeatUpdate.stop();
profilerPeerScan.start();
if (s_logger.isTraceEnabled()) {
s_logger.trace("Cluster manager peer-scan, id:" + _mshostId);
}
if (!_peerScanInited) {
_peerScanInited = true;
initPeerScan();
}
peerScan();
profilerPeerScan.stop();
profilerAgentLB.start();
//initiate agent lb task will be scheduled and executed only once, and only when number of agents loaded exceeds _connectedAgentsThreshold
if (_agentLBEnabled && !_agentLbHappened) {
List<HostVO> allManagedRoutingAgents = _hostDao.listManagedRoutingAgents();
List<HostVO> allAgents = _hostDao.listAllRoutingAgents();
double allHostsCount = allAgents.size();
double managedHostsCount = allManagedRoutingAgents.size();
if (allHostsCount > 0.0) {
double load = managedHostsCount/allHostsCount;
if (load >= _connectedAgentsThreshold) {
s_logger.debug("Scheduling agent rebalancing task as the average agent load " + load + " is more than the threshold " + _connectedAgentsThreshold);
_rebalanceService.scheduleRebalanceAgents();
_agentLbHappened = true;
} else {
s_logger.trace("Not scheduling agent rebalancing task as the averages load " + load + " is less than the threshold " + _connectedAgentsThreshold);
}
}
}
profilerAgentLB.stop();
} finally {
profiler.stop();
if(profiler.getDuration() >= _heartbeatInterval) {
s_logger.warn("Management server heartbeat takes too long to finish. profiler: " + profiler.toString() +
", profilerHeartbeatUpdate: " + profilerHeartbeatUpdate.toString() +
", profilerPeerScan: " + profilerPeerScan.toString() +
", profilerAgentLB: " + profilerAgentLB.toString());
}
}
} catch(CloudRuntimeException e) {
s_logger.error("Runtime DB exception ", e.getCause());

View File

@ -19,25 +19,46 @@
package com.cloud.utils;
public class Profiler {
private long startTickInMs;
private long stopTickInMs;
private Long startTickInMs;
private Long stopTickInMs;
public Profiler() {
startTickInMs = 0;
stopTickInMs = 0;
startTickInMs = null;
stopTickInMs = null;
}
public long start() {
startTickInMs = System.currentTimeMillis();
return startTickInMs;
return startTickInMs.longValue();
}
public long stop() {
stopTickInMs = System.currentTimeMillis();
return stopTickInMs;
return stopTickInMs.longValue();
}
public long getDuration() {
return stopTickInMs - startTickInMs;
}
public long getDuration() {
if(startTickInMs != null && stopTickInMs != null)
return stopTickInMs.longValue() - startTickInMs.longValue();
return -1;
}
public boolean isStarted() {
return startTickInMs != null;
}
public boolean isStopped() {
return stopTickInMs != null;
}
public String toString() {
if(startTickInMs == null)
return "Not Started";
if(stopTickInMs == null)
return "Started but not stopped";
return "Done. Duration: " + getDuration() + "ms";
}
}