diff --git a/server/src/com/cloud/cluster/ClusterManagerImpl.java b/server/src/com/cloud/cluster/ClusterManagerImpl.java index 11b5149fd4e..9cdb4cef8d0 100644 --- a/server/src/com/cloud/cluster/ClusterManagerImpl.java +++ b/server/src/com/cloud/cluster/ClusterManagerImpl.java @@ -593,43 +593,66 @@ public class ClusterManagerImpl implements ClusterManager { public void run() { Transaction txn = Transaction.open("ClusterHeartBeat"); try { - txn.transitToUserManagedConnection(getHeartbeatConnection()); - if(s_logger.isTraceEnabled()) { - s_logger.trace("Cluster manager heartbeat update, id:" + _mshostId); - } - - _mshostDao.update(_mshostId, getCurrentRunId(), DateUtil.currentGMTTime()); - - if (s_logger.isTraceEnabled()) { - s_logger.trace("Cluster manager peer-scan, id:" + _mshostId); - } - - if (!_peerScanInited) { - _peerScanInited = true; - initPeerScan(); - } - - peerScan(); + Profiler profiler = new Profiler(); + Profiler profilerHeartbeatUpdate = new Profiler(); + Profiler profilerPeerScan = new Profiler(); + Profiler profilerAgentLB = new Profiler(); - //initiate agent lb task will be scheduled and executed only once, and only when number of agents loaded exceeds _connectedAgentsThreshold - if (_agentLBEnabled && !_agentLbHappened) { - List allManagedRoutingAgents = _hostDao.listManagedRoutingAgents(); - List allAgents = _hostDao.listAllRoutingAgents(); - double allHostsCount = allAgents.size(); - double managedHostsCount = allManagedRoutingAgents.size(); - if (allHostsCount > 0.0) { - double load = managedHostsCount/allHostsCount; - if (load >= _connectedAgentsThreshold) { - s_logger.debug("Scheduling agent rebalancing task as the average agent load " + load + " is more than the threshold " + _connectedAgentsThreshold); - _rebalanceService.scheduleRebalanceAgents(); - _agentLbHappened = true; - } else { - s_logger.trace("Not scheduling agent rebalancing task as the averages load " + load + " is less than the threshold " + _connectedAgentsThreshold); - } - } + try { + profiler.start(); + + profilerHeartbeatUpdate.start(); + txn.transitToUserManagedConnection(getHeartbeatConnection()); + if(s_logger.isTraceEnabled()) { + s_logger.trace("Cluster manager heartbeat update, id:" + _mshostId); + } + + _mshostDao.update(_mshostId, getCurrentRunId(), DateUtil.currentGMTTime()); + profilerHeartbeatUpdate.stop(); + + profilerPeerScan.start(); + if (s_logger.isTraceEnabled()) { + s_logger.trace("Cluster manager peer-scan, id:" + _mshostId); + } + + if (!_peerScanInited) { + _peerScanInited = true; + initPeerScan(); + } + + peerScan(); + profilerPeerScan.stop(); + + profilerAgentLB.start(); + //initiate agent lb task will be scheduled and executed only once, and only when number of agents loaded exceeds _connectedAgentsThreshold + if (_agentLBEnabled && !_agentLbHappened) { + List allManagedRoutingAgents = _hostDao.listManagedRoutingAgents(); + List allAgents = _hostDao.listAllRoutingAgents(); + double allHostsCount = allAgents.size(); + double managedHostsCount = allManagedRoutingAgents.size(); + if (allHostsCount > 0.0) { + double load = managedHostsCount/allHostsCount; + if (load >= _connectedAgentsThreshold) { + s_logger.debug("Scheduling agent rebalancing task as the average agent load " + load + " is more than the threshold " + _connectedAgentsThreshold); + _rebalanceService.scheduleRebalanceAgents(); + _agentLbHappened = true; + } else { + s_logger.trace("Not scheduling agent rebalancing task as the averages load " + load + " is less than the threshold " + _connectedAgentsThreshold); + } + } + } + profilerAgentLB.stop(); + } finally { + profiler.stop(); + + if(profiler.getDuration() >= _heartbeatInterval) { + s_logger.warn("Management server heartbeat takes too long to finish. profiler: " + profiler.toString() + + ", profilerHeartbeatUpdate: " + profilerHeartbeatUpdate.toString() + + ", profilerPeerScan: " + profilerPeerScan.toString() + + ", profilerAgentLB: " + profilerAgentLB.toString()); + } } - } catch(CloudRuntimeException e) { s_logger.error("Runtime DB exception ", e.getCause()); diff --git a/utils/src/com/cloud/utils/Profiler.java b/utils/src/com/cloud/utils/Profiler.java index 82ec2cb1f89..2f7d8477098 100644 --- a/utils/src/com/cloud/utils/Profiler.java +++ b/utils/src/com/cloud/utils/Profiler.java @@ -19,25 +19,46 @@ package com.cloud.utils; public class Profiler { - private long startTickInMs; - private long stopTickInMs; + private Long startTickInMs; + private Long stopTickInMs; public Profiler() { - startTickInMs = 0; - stopTickInMs = 0; + startTickInMs = null; + stopTickInMs = null; } public long start() { startTickInMs = System.currentTimeMillis(); - return startTickInMs; + return startTickInMs.longValue(); } public long stop() { stopTickInMs = System.currentTimeMillis(); - return stopTickInMs; + return stopTickInMs.longValue(); } - public long getDuration() { - return stopTickInMs - startTickInMs; - } + public long getDuration() { + if(startTickInMs != null && stopTickInMs != null) + return stopTickInMs.longValue() - startTickInMs.longValue(); + + return -1; + } + + public boolean isStarted() { + return startTickInMs != null; + } + + public boolean isStopped() { + return stopTickInMs != null; + } + + public String toString() { + if(startTickInMs == null) + return "Not Started"; + + if(stopTickInMs == null) + return "Started but not stopped"; + + return "Done. Duration: " + getDuration() + "ms"; + } }