diff --git a/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/discoverer/XcpServerDiscoverer.java b/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/discoverer/XcpServerDiscoverer.java index 26b443add9e..45cdaf946a5 100755 --- a/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/discoverer/XcpServerDiscoverer.java +++ b/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/discoverer/XcpServerDiscoverer.java @@ -346,6 +346,7 @@ public class XcpServerDiscoverer extends DiscovererBase implements Discoverer, L details.put("wait", Integer.toString(_wait)); params.put("migratewait", _configDao.getValue(Config.MigrateWait.toString())); params.put(Config.XenServerMaxNics.toString().toLowerCase(), _configDao.getValue(Config.XenServerMaxNics.toString())); + params.put(Config.XenServerHeartBeatTimeout.toString().toLowerCase(), _configDao.getValue(Config.XenServerHeartBeatTimeout.toString())); params.put(Config.XenServerHeartBeatInterval.toString().toLowerCase(), _configDao.getValue(Config.XenServerHeartBeatInterval.toString())); params.put(Config.InstanceName.toString().toLowerCase(), _instance); details.put(Config.InstanceName.toString().toLowerCase(), _instance); diff --git a/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java b/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java index b12dc244c0f..f5a2ea43802 100644 --- a/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java +++ b/plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java @@ -285,6 +285,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe protected IAgentControl _agentControl; final int _maxWeight = 256; + protected int _heartbeatTimeout = 120; protected int _heartbeatInterval = 60; protected final XsHost _host = new XsHost(); @@ -4494,8 +4495,10 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe } protected boolean launchHeartBeat(Connection conn) { - String result = callHostPluginPremium(conn, "heartbeat", "host", _host.uuid, "interval", Integer - .toString(_heartbeatInterval)); + String result = callHostPluginPremium(conn, "heartbeat", + "host", _host.uuid, + "timeout", Integer.toString(_heartbeatTimeout), + "interval", Integer.toString(_heartbeatInterval)); if (result == null || !result.contains("> DONE <")) { s_logger.warn("Unable to launch the heartbeat process on " + _host.ip); return false; @@ -5573,6 +5576,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe _storageNetworkName1 = (String)params.get("storage.network.device1"); _storageNetworkName2 = (String)params.get("storage.network.device2"); + _heartbeatTimeout = NumbersUtil.parseInt((String)params.get("xenserver.heartbeat.timeout"), 120); _heartbeatInterval = NumbersUtil.parseInt((String)params.get("xenserver.heartbeat.interval"), 60); String value = (String)params.get("wait"); diff --git a/scripts/vm/hypervisor/xenserver/launch_hb.sh b/scripts/vm/hypervisor/xenserver/launch_hb.sh index 289eb5f8de6..0a12b7a9da8 100755 --- a/scripts/vm/hypervisor/xenserver/launch_hb.sh +++ b/scripts/vm/hypervisor/xenserver/launch_hb.sh @@ -33,6 +33,11 @@ if [ -z $2 ]; then exit 3 fi +if [ -z $3 ]; then + usage + exit 3 +fi + if [ ! -f /opt/cloud/bin/xenheartbeat.sh ]; then printf "Error: Unable to find xenheartbeat.sh to launch\n" exit 4 @@ -42,5 +47,5 @@ for psid in `ps -ef | grep xenheartbeat | grep -v grep | awk '{print $2}'`; do kill $psid done -nohup /opt/cloud/bin/xenheartbeat.sh $1 $2 >/dev/null 2>/dev/null & +nohup /opt/cloud/bin/xenheartbeat.sh $1 $2 $3 >/dev/null 2>/dev/null & echo "======> DONE <======" diff --git a/scripts/vm/hypervisor/xenserver/vmopspremium b/scripts/vm/hypervisor/xenserver/vmopspremium index 461e4059b27..5b9c34fc281 100755 --- a/scripts/vm/hypervisor/xenserver/vmopspremium +++ b/scripts/vm/hypervisor/xenserver/vmopspremium @@ -127,9 +127,10 @@ def setup_heartbeat_file(session, args): @echo def heartbeat(session, args): host = args['host'] + timeout = args['timeout'] interval = args['interval'] try: - cmd = ["/bin/bash", "/opt/cloud/bin/launch_hb.sh", host, interval] + cmd = ["/bin/bash", "/opt/cloud/bin/launch_hb.sh", host, timeout, interval] txt = util.pread2(cmd) except: txt='fail' diff --git a/scripts/vm/hypervisor/xenserver/xenheartbeat.sh b/scripts/vm/hypervisor/xenserver/xenheartbeat.sh index d5a5d862c71..88f484184c4 100755 --- a/scripts/vm/hypervisor/xenserver/xenheartbeat.sh +++ b/scripts/vm/hypervisor/xenserver/xenheartbeat.sh @@ -26,11 +26,15 @@ usage() { if [ -z $1 ]; then usage exit 2 +else + host=$1 fi if [ -z $2 ]; then usage exit 3 +else + timeout=$2 fi if [ ! -z $3 ]; then @@ -39,7 +43,7 @@ else interval=5 fi -if [ $interval -gt $2 ]; then +if [ $interval -gt $timeout ]; then usage exit 3 fi @@ -47,7 +51,7 @@ fi file=/opt/cloud/bin/heartbeat lastdate=$(($(date +%s) + $interval)) -while [ $(date +%s) -lt $(($lastdate + $2)) ] +while [ $(date +%s) -lt $(($lastdate + $timeout)) ] do sleep $interval @@ -69,7 +73,7 @@ do for dir in $dirs do if [ -d $dir ]; then - hb=$dir/hb-$1 + hb=$dir/hb-$host date +%s | dd of=$hb count=100 bs=1 2>/dev/null if [ $? -ne 0 ]; then /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($(date +%s) - $lastdate)) seconds" @@ -89,7 +93,7 @@ do do mp=`mount | grep $dir` if [ -n "$mp" ]; then - hb=$dir/hb-$1 + hb=$dir/hb-$host date +%s | dd of=$hb count=100 bs=1 2>/dev/null if [ $? -ne 0 ]; then /usr/bin/logger -t heartbeat "Potential problem with $hb: not reachable since $(($(date +%s) - $lastdate)) seconds" diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index 1012c4d6c46..1f15d39f81e 100755 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -1099,13 +1099,21 @@ public enum Config { null), XenServerSetupMultipath("Advanced", ManagementServer.class, String.class, "xenserver.setup.multipath", "false", "Setup the host to do multipath", null), XenServerBondStorageNic("Advanced", ManagementServer.class, String.class, "xenserver.bond.storage.nics", null, "Attempt to bond the two networks if found", null), + XenServerHeartBeatTimeout( + "Advanced", + ManagementServer.class, + Integer.class, + "xenserver.heartbeat.timeout", + "120", + "heartbeat timeout to use when implementing XenServer Self Fencing", + null), XenServerHeartBeatInterval( "Advanced", ManagementServer.class, Integer.class, "xenserver.heartbeat.interval", "60", - "heartbeat to use when implementing XenServer Self Fencing", + "heartbeat interval to use when checking before XenServer Self Fencing", null), XenServerGuestNetwork("Hidden", ManagementServer.class, String.class, "xenserver.guest.network.device", null, "Specify for guest network name label", null), XenServerMaxNics("Advanced", AgentManager.class, Integer.class, "xenserver.nics.max", "7", "Maximum allowed nics for Vms created on XenServer", null), diff --git a/server/src/com/cloud/configuration/ConfigurationManagerImpl.java b/server/src/com/cloud/configuration/ConfigurationManagerImpl.java index ad81ccd9a7f..df6ce454335 100755 --- a/server/src/com/cloud/configuration/ConfigurationManagerImpl.java +++ b/server/src/com/cloud/configuration/ConfigurationManagerImpl.java @@ -363,6 +363,7 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati configValuesForValidation.add("storage.cleanup.interval"); configValuesForValidation.add("wait"); configValuesForValidation.add("xenserver.heartbeat.interval"); + configValuesForValidation.add("xenserver.heartbeat.timeout"); configValuesForValidation.add("incorrect.login.attempts.allowed"); } diff --git a/server/src/com/cloud/resource/DiscovererBase.java b/server/src/com/cloud/resource/DiscovererBase.java index 29871450545..ad32b9fda56 100644 --- a/server/src/com/cloud/resource/DiscovererBase.java +++ b/server/src/com/cloud/resource/DiscovererBase.java @@ -137,6 +137,7 @@ public abstract class DiscovererBase extends AdapterBase implements Discoverer { params.put("migratewait", _configDao.getValue(Config.MigrateWait.toString())); params.put(Config.XenServerMaxNics.toString().toLowerCase(), _configDao.getValue(Config.XenServerMaxNics.toString())); params.put(Config.XenServerHeartBeatInterval.toString().toLowerCase(), _configDao.getValue(Config.XenServerHeartBeatInterval.toString())); + params.put(Config.XenServerHeartBeatTimeout.toString().toLowerCase(), _configDao.getValue(Config.XenServerHeartBeatTimeout.toString())); params.put("router.aggregation.command.each.timeout", _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); return params; diff --git a/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ b/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ index d6bdc5d63e3..d7d211ee57f 100755 --- a/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ +++ b/systemvm/patches/debian/config/root/redundant_router/check_heartbeat.sh.templ @@ -45,8 +45,8 @@ then [RROUTER_BIN_PATH]/primary-backup.sh fault >> [RROUTER_LOG] 2>&1 service keepalived stop >> [RROUTER_LOG] 2>&1 service conntrackd stop >> [RROUTER_LOG] 2>&1 - pkill -9 keepalived >> [RROUTER_LOG] 2>&1 - pkill -9 conntrackd >> [RROUTER_LOG] 2>&1 + pkill -9 keepalived >> [RROUTER_LOG] 2>&1 + pkill -9 conntrackd >> [RROUTER_LOG] 2>&1 echo Status: FAULT \(keepalived process is dead\) >> [RROUTER_LOG] exit fi