From 4d065b9a3a336d59902c266202c1094509c007d2 Mon Sep 17 00:00:00 2001 From: Daan Hoogland Date: Tue, 9 Sep 2014 22:19:54 +0200 Subject: [PATCH] CLOUDSTACK-7184: xenheartbeat gets passed timeout and interval --- .../xen/discoverer/XcpServerDiscoverer.java | 1 + .../hypervisor/xen/resource/CitrixResourceBase.java | 8 ++++++-- scripts/vm/hypervisor/xenserver/launch_hb.sh | 7 ++++++- scripts/vm/hypervisor/xenserver/vmopspremium | 3 ++- scripts/vm/hypervisor/xenserver/xenheartbeat.sh | 12 ++++++++---- server/src/com/cloud/configuration/Config.java | 10 +++++++++- .../configuration/ConfigurationManagerImpl.java | 1 + server/src/com/cloud/resource/DiscovererBase.java | 1 + .../root/redundant_router/check_heartbeat.sh.templ | 4 ++-- 9 files changed, 36 insertions(+), 11 deletions(-) diff --git a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java index 0e7cf74ae43..eda28f3eb8a 100755 --- a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java +++ b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/discoverer/XcpServerDiscoverer.java @@ -347,6 +347,7 @@ public class XcpServerDiscoverer extends DiscovererBase implements Discoverer, L details.put("wait", Integer.toString(_wait)); params.put("migratewait", _configDao.getValue(Config.MigrateWait.toString())); params.put(Config.XenMaxNics.toString().toLowerCase(), _configDao.getValue(Config.XenMaxNics.toString())); + params.put(Config.XenHeartBeatTimeout.toString().toLowerCase(), _configDao.getValue(Config.XenHeartBeatTimeout.toString())); params.put(Config.XenHeartBeatInterval.toString().toLowerCase(), _configDao.getValue(Config.XenHeartBeatInterval.toString())); params.put(Config.InstanceName.toString().toLowerCase(), _instance); details.put(Config.InstanceName.toString().toLowerCase(), _instance); diff --git a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java index c1c97955f1d..644ef65dce6 100644 --- a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java +++ b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java @@ -289,6 +289,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe protected IAgentControl _agentControl; final int _maxWeight = 256; + protected int _heartbeatTimeout = 120; protected int _heartbeatInterval = 60; protected final XsHost _host = new XsHost(); @@ -4603,8 +4604,10 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe } protected boolean launchHeartBeat(Connection conn) { - String result = callHostPluginPremium(conn, "heartbeat", "host", _host.uuid, "interval", Integer - .toString(_heartbeatInterval)); + String result = callHostPluginPremium(conn, "heartbeat", + "host", _host.uuid, + "timeout", Integer.toString(_heartbeatTimeout), + "interval", Integer.toString(_heartbeatInterval)); if (result == null || !result.contains("> DONE <")) { s_logger.warn("Unable to launch the heartbeat process on " + _host.ip); return false; @@ -5674,6 +5677,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe _storageNetworkName1 = (String)params.get("storage.network.device1"); _storageNetworkName2 = (String)params.get("storage.network.device2"); + _heartbeatTimeout = NumbersUtil.parseInt((String)params.get("xen.heartbeat.timeout"), 120); _heartbeatInterval = NumbersUtil.parseInt((String)params.get("xen.heartbeat.interval"), 60); String value = (String)params.get("wait"); diff --git a/scripts/vm/hypervisor/xenserver/launch_hb.sh b/scripts/vm/hypervisor/xenserver/launch_hb.sh index 289eb5f8de6..0a12b7a9da8 100755 --- a/scripts/vm/hypervisor/xenserver/launch_hb.sh +++ b/scripts/vm/hypervisor/xenserver/launch_hb.sh @@ -33,6 +33,11 @@ if [ -z $2 ]; then exit 3 fi +if [ -z $3 ]; then + usage + exit 3 +fi + if [ ! -f /opt/cloud/bin/xenheartbeat.sh ]; then printf "Error: Unable to find xenheartbeat.sh to launch\n" exit 4 @@ -42,5 +47,5 @@ for psid in `ps -ef | grep xenheartbeat | grep -v grep | awk '{print $2}'`; do kill $psid done -nohup /opt/cloud/bin/xenheartbeat.sh $1 $2 >/dev/null 2>/dev/null & +nohup /opt/cloud/bin/xenheartbeat.sh $1 $2 $3 >/dev/null 2>/dev/null & echo "======> DONE <======" diff --git a/scripts/vm/hypervisor/xenserver/vmopspremium b/scripts/vm/hypervisor/xenserver/vmopspremium index 2887436670f..f21864415aa 100755 --- a/scripts/vm/hypervisor/xenserver/vmopspremium +++ b/scripts/vm/hypervisor/xenserver/vmopspremium @@ -127,9 +127,10 @@ def setup_heartbeat_file(session, args): @echo def heartbeat(session, args): host = args['host'] + timeout = args['timeout'] interval = args['interval'] try: - cmd = ["/bin/bash", "/opt/cloud/bin/launch_hb.sh", host, interval] + cmd = ["/bin/bash", "/opt/cloud/bin/launch_hb.sh", host, timeout, interval] txt = util.pread2(cmd) except: txt='fail' diff --git a/scripts/vm/hypervisor/xenserver/xenheartbeat.sh b/scripts/vm/hypervisor/xenserver/xenheartbeat.sh index d5a5d862c71..88f484184c4 100755 --- a/scripts/vm/hypervisor/xenserver/xenheartbeat.sh +++ b/scripts/vm/hypervisor/xenserver/xenheartbeat.sh @@ -26,11 +26,15 @@ usage() { if [ -z $1 ]; then usage exit 2 +else + host=$1 fi if [ -z $2 ]; then usage exit 3 +else + timeout=$2 fi if [ ! -z $3 ]; then @@ -39,7 +43,7 @@ else interval=5 fi -if [ $interval -gt $2 ]; then +if [ $interval -gt $timeout ]; then usage exit 3 fi @@ -47,7 +51,7 @@ fi file=/opt/cloud/bin/heartbeat lastdate=$(($(date +%s) + $interval)) -while [ $(date +%s) -lt $(($lastdate + $2)) ] +while [ $(date +%s) -lt $(($lastdate + $timeout)) ] do sleep $interval @@ -69,7 +73,7 @@ do for dir in $dirs do if [ -d $dir ]; then - hb=$dir/hb-$1 + hb=$dir/hb-$host date +%s | dd of=$hb count=100 bs=1 2>/dev/null if [ $? -ne 0 ]; then /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($(date +%s) - $lastdate)) seconds" @@ -89,7 +93,7 @@ do do mp=`mount | grep $dir` if [ -n "$mp" ]; then - hb=$dir/hb-$1 + hb=$dir/hb-$host date +%s | dd of=$hb count=100 bs=1 2>/dev/null if [ $? -ne 0 ]; then /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($(date +%s) - $lastdate)) seconds" diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index bf72dc2246b..56ae5f9309f 100755 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -1099,13 +1099,21 @@ public enum Config { null), XenSetupMultipath("Advanced", ManagementServer.class, String.class, "xen.setup.multipath", "false", "Setup the host to do multipath", null), XenBondStorageNic("Advanced", ManagementServer.class, String.class, "xen.bond.storage.nics", null, "Attempt to bond the two networks if found", null), + XenHeartBeatTimeout( + "Advanced", + ManagementServer.class, + Integer.class, + "xen.heartbeat.timeout", + "120", + "heartbeat timeout to use when implementing XenServer Self Fencing", + null), XenHeartBeatInterval( "Advanced", ManagementServer.class, Integer.class, "xen.heartbeat.interval", "60", - "heartbeat to use when implementing XenServer Self Fencing", + "heartbeat interval to use when checking before XenServer Self Fencing", null), XenGuestNetwork("Hidden", ManagementServer.class, String.class, "xen.guest.network.device", null, "Specify for guest network name label", null), XenMaxNics("Advanced", AgentManager.class, Integer.class, "xen.nics.max", "7", "Maximum allowed nics for Vms created on Xen", null), diff --git a/server/src/com/cloud/configuration/ConfigurationManagerImpl.java b/server/src/com/cloud/configuration/ConfigurationManagerImpl.java index 010ca7a085d..48b3cab8214 100755 --- a/server/src/com/cloud/configuration/ConfigurationManagerImpl.java +++ b/server/src/com/cloud/configuration/ConfigurationManagerImpl.java @@ -363,6 +363,7 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati configValuesForValidation.add("storage.cleanup.interval"); configValuesForValidation.add("wait"); configValuesForValidation.add("xen.heartbeat.interval"); + configValuesForValidation.add("xen.heartbeat.timeout"); configValuesForValidation.add("incorrect.login.attempts.allowed"); } diff --git a/server/src/com/cloud/resource/DiscovererBase.java b/server/src/com/cloud/resource/DiscovererBase.java index 871dc15cf65..63f2ab58dcc 100644 --- a/server/src/com/cloud/resource/DiscovererBase.java +++ b/server/src/com/cloud/resource/DiscovererBase.java @@ -137,6 +137,7 @@ public abstract class DiscovererBase extends AdapterBase implements Discoverer { params.put("migratewait", _configDao.getValue(Config.MigrateWait.toString())); params.put(Config.XenMaxNics.toString().toLowerCase(), _configDao.getValue(Config.XenMaxNics.toString())); params.put(Config.XenHeartBeatInterval.toString().toLowerCase(), _configDao.getValue(Config.XenHeartBeatInterval.toString())); + params.put(Config.XenHeartBeatTimeout.toString().toLowerCase(), _configDao.getValue(Config.XenHeartBeatTimeout.toString())); params.put("router.aggregation.command.each.timeout", _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); return params; diff --git a/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ b/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ index d6bdc5d63e3..d7d211ee57f 100755 --- a/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ +++ b/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ @@ -45,8 +45,8 @@ then [RROUTER_BIN_PATH]/primary-backup.sh fault >> [RROUTER_LOG] 2>&1 service keepalived stop >> [RROUTER_LOG] 2>&1 service conntrackd stop >> [RROUTER_LOG] 2>&1 - pkill -9 keepalived >> [RROUTER_LOG] 2>&1 - pkill -9 conntrackd >> [RROUTER_LOG] 2>&1 + pkill -9 keepalived >> [RROUTER_LOG] 2>&1 + pkill -9 conntrackd >> [RROUTER_LOG] 2>&1 echo Status: FAULT \(keepalived process is dead\) >> [RROUTER_LOG] exit fi