From 88c1da679cb29aaaa2c2254b0a229807af641966 Mon Sep 17 00:00:00 2001 From: Anthony Xu Date: Tue, 25 Mar 2014 10:42:31 -0700 Subject: [PATCH] check_heartbeat and pingtest execute through ssh, not XAPI, because XAPI may hang when master host is downi --- .../xen/resource/CitrixResourceBase.java | 43 ++++++++++--- .../xen/resource/XenServer56FP1Resource.java | 4 +- .../xen/resource/XenServer56Resource.java | 62 ++++++++++++------- .../hypervisor/xenserver/check_heartbeat.sh | 1 + scripts/vm/hypervisor/xenserver/vmopspremium | 16 +---- 5 files changed, 78 insertions(+), 48 deletions(-) diff --git a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java index 35bbcd1ebd4..d3fdb193bc0 100644 --- a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java +++ b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/CitrixResourceBase.java @@ -406,17 +406,28 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe } - protected boolean pingXenServer() { + protected boolean pingXAPI() { Connection conn = getConnection(); + try { + Host host = Host.getByUuid(conn, _host.uuid); + if( !host.getEnabled(conn) ) { + s_logger.debug("Host " + _host.ip + " is not enabled!"); + return false; + } + } catch (Exception e) { + s_logger.debug("cannot get host enabled status, host " + _host.ip + " due to " + e.toString(), e); + return false; + } try { callHostPlugin(conn, "echo", "main"); - return true; } catch (Exception e) { s_logger.debug("cannot ping host " + _host.ip + " due to " + e.toString(), e); + return false; } - return false; + return true; } + protected String logX(XenAPIObject obj, String msg) { return new StringBuilder("Host ").append(_host.ip).append(" ").append(obj.toWireString()).append(": ").append(msg).toString(); } @@ -2006,12 +2017,24 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe } private boolean doPingTest(Connection conn, final String computingHostIp) { - String args = "-h " + computingHostIp; - String result = callHostPlugin(conn, "vmops", "pingtest", "args", args); - if (result == null || result.isEmpty()) { + com.trilead.ssh2.Connection sshConnection = new com.trilead.ssh2.Connection(_host.ip, 22); + try { + sshConnection.connect(null, 60000, 60000); + if (!sshConnection.authenticateWithPassword(_username, _password.peek())) { + throw new CloudRuntimeException("Unable to authenticate"); + } + + String cmd = "ping -c 2 " + computingHostIp; + if (!SSHCmdHelper.sshExecuteCmd(sshConnection, cmd)) { + throw new CloudRuntimeException("Cannot ping host " + computingHostIp + " from host " + _host.ip); + } + return true; + } catch (Exception e) { + s_logger.warn("Catch exception " + e.toString(), e); return false; + } finally { + sshConnection.close(); } - return true; } protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) { @@ -2238,7 +2261,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe } protected CheckHealthAnswer execute(CheckHealthCommand cmd) { - boolean result = pingXenServer(); + boolean result = pingXAPI(); return new CheckHealthAnswer(cmd, result); } @@ -4341,9 +4364,9 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe @Override public PingCommand getCurrentStatus(long id) { try { - if (!pingXenServer()) { + if (!pingXAPI()) { Thread.sleep(1000); - if (!pingXenServer()) { + if (!pingXAPI()) { s_logger.warn(" can not ping xenserver " + _host.uuid); return null; } diff --git a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56FP1Resource.java b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56FP1Resource.java index 71436277f41..ee9b05d9344 100644 --- a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56FP1Resource.java +++ b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56FP1Resource.java @@ -73,12 +73,10 @@ public class XenServer56FP1Resource extends XenServer56Resource { protected FenceAnswer execute(FenceCommand cmd) { Connection conn = getConnection(); try { - String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHostGuid(), "interval", Integer.toString(_heartbeatInterval * 2)); - if (!result.contains("> DEAD <")) { + if (check_heartbeat(cmd.getHostGuid())) { s_logger.debug("Heart beat is still going so unable to fence"); return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence"); } - Set vms = VM.getByNameLabel(conn, cmd.getVmName()); for (VM vm : vms) { Set vdis = new HashSet(); diff --git a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56Resource.java b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56Resource.java index 7e26a5cdb22..9952ded4cc7 100644 --- a/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56Resource.java +++ b/plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer56Resource.java @@ -28,6 +28,7 @@ import com.cloud.agent.api.StartupCommand; import com.cloud.resource.ServerResource; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.utils.script.Script; +import com.cloud.utils.ssh.SSHCmdHelper; import com.xensource.xenapi.Connection; import com.xensource.xenapi.Host; import com.xensource.xenapi.Network; @@ -208,15 +209,37 @@ public class XenServer56Resource extends CitrixResourceBase { } } + protected Boolean check_heartbeat(String hostuuid) { + com.trilead.ssh2.Connection sshConnection = new com.trilead.ssh2.Connection(_host.ip, 22); + try { + sshConnection.connect(null, 60000, 60000); + if (!sshConnection.authenticateWithPassword(_username, _password.peek())) { + throw new CloudRuntimeException("Unable to authenticate"); + } + + String shcmd = "/opt/cloud/bin/check_heartbeat.sh " + hostuuid + " " + + Integer.toString(_heartbeatInterval * 2); + if (!SSHCmdHelper.sshExecuteCmd(sshConnection, shcmd)) { + s_logger.debug("Heart beat is gone so dead."); + return false; + } + s_logger.debug("Heart beat is still going"); + return true; + } catch (Exception e) { + s_logger.debug("health check failed due to catch exception " + e.toString()); + return null; + } finally { + sshConnection.close(); + } + } + protected FenceAnswer execute(FenceCommand cmd) { Connection conn = getConnection(); try { - String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHostGuid(), "interval", Integer.toString(_heartbeatInterval * 2)); - if (!result.contains("> DEAD <")) { + if (check_heartbeat(cmd.getHostGuid())) { s_logger.debug("Heart beat is still going so unable to fence"); return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence"); } - Set vms = VM.getByNameLabel(conn, cmd.getVmName()); for (VM vm : vms) { synchronized (_cluster.intern()) { @@ -236,6 +259,7 @@ public class XenServer56Resource extends CitrixResourceBase { } } + @Override protected boolean transferManagementNetwork(Connection conn, Host host, PIF src, PIF.Record spr, PIF dest) throws XmlRpcException, XenAPIException { dest.reconfigureIp(conn, spr.ipConfigurationMode, spr.IP, spr.netmask, spr.gateway, spr.DNS); @@ -269,33 +293,29 @@ public class XenServer56Resource extends CitrixResourceBase { @Override public StartupCommand[] initialize() { - pingXenServer(); + pingXAPI(); StartupCommand[] cmds = super.initialize(); return cmds; } + @Override protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) { - try { - Connection conn = getConnection(); - String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHost().getGuid(), "interval", Integer.toString(_heartbeatInterval * 2)); - if (result == null) { - return new CheckOnHostAnswer(cmd, "Unable to call plugin"); - } - if (result.contains("> DEAD <")) { - s_logger.debug("Heart beat is gone so dead."); - return new CheckOnHostAnswer(cmd, false, "Heart Beat is done"); - } else if (result.contains("> ALIVE <")) { - s_logger.debug("Heart beat is still going"); - return new CheckOnHostAnswer(cmd, true, "Heartbeat is still going"); - } - return new CheckOnHostAnswer(cmd, null, "Unable to determine"); - } catch (Exception e) { - s_logger.warn("Unable to fence", e); - return new CheckOnHostAnswer(cmd, e.getMessage()); + Boolean alive = check_heartbeat(cmd.getHost().getGuid()); + String msg = ""; + if (alive == null) { + msg = " cannot determine "; + } else if ( alive == true) { + msg = "Heart beat is still going"; + } else { + msg = "Heart beat is gone so dead."; } + s_logger.debug(msg); + return new CheckOnHostAnswer(cmd, alive, msg); + } + public XenServer56Resource() { super(); } diff --git a/scripts/vm/hypervisor/xenserver/check_heartbeat.sh b/scripts/vm/hypervisor/xenserver/check_heartbeat.sh index 22befe91652..e1b18278dd5 100755 --- a/scripts/vm/hypervisor/xenserver/check_heartbeat.sh +++ b/scripts/vm/hypervisor/xenserver/check_heartbeat.sh @@ -72,3 +72,4 @@ do done echo "=====> DEAD <======" +exit 1 diff --git a/scripts/vm/hypervisor/xenserver/vmopspremium b/scripts/vm/hypervisor/xenserver/vmopspremium index 06b0a51f7ed..2887436670f 100755 --- a/scripts/vm/hypervisor/xenserver/vmopspremium +++ b/scripts/vm/hypervisor/xenserver/vmopspremium @@ -123,18 +123,7 @@ def setup_heartbeat_file(session, args): txt = '' return txt -@echo -def check_heartbeat(session, args): - host = args['host'] - interval = args['interval'] - try: - cmd = ["bash", "/opt/cloud/bin/check_heartbeat.sh", host, interval] - txt = util.pread2(cmd) - except: - txt='' - return txt - - + @echo def heartbeat(session, args): host = args['host'] @@ -156,5 +145,4 @@ def asmonitor(session, args): return 'fail' if __name__ == "__main__": - XenAPIPlugin.dispatch({"forceShutdownVM":forceShutdownVM, "upgrade_snapshot":upgrade_snapshot, "create_privatetemplate_from_snapshot":create_privatetemplate_from_snapshot, "copy_vhd_to_secondarystorage":copy_vhd_to_secondarystorage, "copy_vhd_from_secondarystorage":copy_vhd_from_secondarystorage, "setup_heartbeat_sr":setup_heartbeat_sr, "setup_heartbeat_file":setup_heartbeat_file, "check_heartbeat":check_heartbeat, "heartbeat": heartbeat, "asmonitor": asmonitor}) - + XenAPIPlugin.dispatch({"forceShutdownVM":forceShutdownVM, "upgrade_snapshot":upgrade_snapshot, "create_privatetemplate_from_snapshot":create_privatetemplate_from_snapshot, "copy_vhd_to_secondarystorage":copy_vhd_to_secondarystorage, "copy_vhd_from_secondarystorage":copy_vhd_from_secondarystorage, "setup_heartbeat_sr":setup_heartbeat_sr, "setup_heartbeat_file":setup_heartbeat_file, "heartbeat": heartbeat, "asmonitor": asmonitor})